diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5aa9b0ca..a2ea67ef 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -61,7 +61,7 @@ install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/py"
PATTERN "*.pyc" EXCLUDE
PATTERN ".gitignore" EXCLUDE)
-set(tools "igrec.py;mass_spectra_analyzer.py;dense_subgraph_finder.py;igquast.py;barcoded_igrec.py;diversity_analyzer.py")
+set(tools "igrec.py;mass_spectra_analyzer.py;dense_subgraph_finder.py;igquast.py;barcoded_igrec.py;diversity_analyzer.py;ig_simulator.py")
foreach(tool ${tools})
install(PROGRAMS ${tool}
DESTINATION "${INSTALL_DIR}"
diff --git a/Makefile b/Makefile
index 8071db42..03ace38c 100644
--- a/Makefile
+++ b/Makefile
@@ -48,6 +48,9 @@ cdr: cmake
umi: cmake
$(MAKE) -C build/release umi_correction_stats umi_graph umi_naive umi_to_fastq
+igs: cmake
+ $(MAKE) -C build/release ig_simulator
+
clean:
-rm -r build
@@ -55,4 +58,5 @@ clean_tests:
-rm *.pyc
-rm -r igrec_test
-rm -r ms_analyzer_test
+ -rm -r ig_simulator_test
-rm *~
diff --git a/configs/ig_simulator/config.info b/configs/ig_simulator/config.info
new file mode 100644
index 00000000..0a870023
--- /dev/null
+++ b/configs/ig_simulator/config.info
@@ -0,0 +1,107 @@
+io_params {
+ input_params {
+ germline_input {
+ ig_dir IG_antevolo
+ tcr_dir TCR
+ germline_filenames_config ./configs/vj_finder/germline_files_config.txt
+ }
+ cdr_labeler_config_filename ./configs/cdr_labeler/config.info
+ }
+
+ output_params {
+ output_dir ig_simulator_test
+ log_filename log.properties
+ base_repertoire_filename base_repertoire.fasta
+ base_repertoire_info base_repertoire.info
+ filtered_pool filtered_pool.fasta
+ full_pool full_pool.fasta
+ trees_dir trees_dir
+ }
+}
+
+germline_params {
+ organism human
+ loci IGH
+ pseudogenes false
+ germline_dir ./data/germline
+}
+
+simulation_params {
+ base_repertoire_params {
+ metaroot_simulation_params {
+ gene_chooser_params {
+ gene_chooser_method uniform
+ }
+
+ nucleotides_remover_params {
+ nucleotides_remover_method uniform
+ uniform_remover_params {
+ max_remove_v_gene 20
+ max_remove_d_gene_left 5
+ max_remove_d_gene_right 5
+ max_remove_j_gene 10
+ }
+ }
+
+ p_nucleotides_creator_params {
+ p_nucleotides_creator_method uniform
+ uniform_creator_params {
+ max_create_v_gene 5
+ max_create_d_gene_left 3
+ max_create_d_gene_right 3
+ max_create_j_gene 3
+ }
+ }
+
+ n_nucleotides_inserter_params {
+ n_nucleotides_method uniform
+ uniform_inserter_params {
+ max_vj_insertion 10
+ max_vd_insertion 21
+ max_dj_insertion 23
+ }
+ }
+
+ cleavage_params {
+ prob_cleavage_v 0.5
+ prob_cleavage_d_left 0.5
+ prob_cleavage_d_right 0.5
+ prob_cleavage_j 0.5
+ }
+ }
+
+ multiplicity_creator_params {
+ multiplicity_method geometric
+ geometric_params {
+ lambda 0.1
+ }
+ }
+
+ productive_params {
+ productive_part 1
+ }
+
+ number_of_metaroots 10
+ }
+
+ clonal_tree_simulator_params {
+ tree_size_generator_params {
+ tree_size_generator_method geometric
+ geometric_params {
+ lambda 0.001
+ }
+ }
+
+ shm_creator_params {
+ shm_creator_method poisson
+ poisson_params {
+ lambda 2
+ }
+ }
+
+ pool_manager_strategy wide; uniform, wide, deep
+ prob_ret_to_pool 0.9
+ lambda_distr_n_children 0.3
+
+ }
+}
diff --git a/configs/ig_simulator/config.info.template b/configs/ig_simulator/config.info.template
new file mode 100644
index 00000000..0a870023
--- /dev/null
+++ b/configs/ig_simulator/config.info.template
@@ -0,0 +1,107 @@
+io_params {
+ input_params {
+ germline_input {
+ ig_dir IG_antevolo
+ tcr_dir TCR
+ germline_filenames_config ./configs/vj_finder/germline_files_config.txt
+ }
+ cdr_labeler_config_filename ./configs/cdr_labeler/config.info
+ }
+
+ output_params {
+ output_dir ig_simulator_test
+ log_filename log.properties
+ base_repertoire_filename base_repertoire.fasta
+ base_repertoire_info base_repertoire.info
+ filtered_pool filtered_pool.fasta
+ full_pool full_pool.fasta
+ trees_dir trees_dir
+ }
+}
+
+germline_params {
+ organism human
+ loci IGH
+ pseudogenes false
+ germline_dir ./data/germline
+}
+
+simulation_params {
+ base_repertoire_params {
+ metaroot_simulation_params {
+ gene_chooser_params {
+ gene_chooser_method uniform
+ }
+
+ nucleotides_remover_params {
+ nucleotides_remover_method uniform
+ uniform_remover_params {
+ max_remove_v_gene 20
+ max_remove_d_gene_left 5
+ max_remove_d_gene_right 5
+ max_remove_j_gene 10
+ }
+ }
+
+ p_nucleotides_creator_params {
+ p_nucleotides_creator_method uniform
+ uniform_creator_params {
+ max_create_v_gene 5
+ max_create_d_gene_left 3
+ max_create_d_gene_right 3
+ max_create_j_gene 3
+ }
+ }
+
+ n_nucleotides_inserter_params {
+ n_nucleotides_method uniform
+ uniform_inserter_params {
+ max_vj_insertion 10
+ max_vd_insertion 21
+ max_dj_insertion 23
+ }
+ }
+
+ cleavage_params {
+ prob_cleavage_v 0.5
+ prob_cleavage_d_left 0.5
+ prob_cleavage_d_right 0.5
+ prob_cleavage_j 0.5
+ }
+ }
+
+ multiplicity_creator_params {
+ multiplicity_method geometric
+ geometric_params {
+ lambda 0.1
+ }
+ }
+
+ productive_params {
+ productive_part 1
+ }
+
+ number_of_metaroots 10
+ }
+
+ clonal_tree_simulator_params {
+ tree_size_generator_params {
+ tree_size_generator_method geometric
+ geometric_params {
+ lambda 0.001
+ }
+ }
+
+ shm_creator_params {
+ shm_creator_method poisson
+ poisson_params {
+ lambda 2
+ }
+ }
+
+ pool_manager_strategy wide; uniform, wide, deep
+ prob_ret_to_pool 0.9
+ lambda_distr_n_children 0.3
+
+ }
+}
diff --git a/data/germline/human/IG_antevolo/IGHD.fa b/data/germline/human/IG_antevolo/IGHD.fa
new file mode 100644
index 00000000..42135f89
--- /dev/null
+++ b/data/germline/human/IG_antevolo/IGHD.fa
@@ -0,0 +1,54 @@
+>IGHD1-1*01
+ggtacaactggaacgac
+>IGHD1-14*01
+ggtataaccggaaccac
+>IGHD1-20*01
+ggtataactggaacgac
+>IGHD1-26*01
+ggtatagtgggagctactac
+>IGHD1-7*01
+ggtataactggaactac
+>IGHD2-15*01
+aggatattgtagtggtggtagctgctactcc
+>IGHD2-2*01
+aggatattgtagtagtaccagctgctatgcc
+>IGHD2-21*01
+agcatattgtggtggtgattgctattcc
+>IGHD2-8*01
+aggatattgtactaatggtgtatgctatacc
+>IGHD3-10*01
+gtattactatggttcggggagttattataac
+>IGHD3-16*01
+gtattatgattacgtttgggggagttatgcttatacc
+>IGHD3-22*01
+gtattactatgatagtagtggttattactac
+>IGHD3-3*01
+gtattacgatttttggagtggttattatacc
+>IGHD3-9*01
+gtattacgatattttgactggttattataac
+>IGHD4-11*01
+tgactacagtaactac
+>IGHD4-17*01
+tgactacggtgactac
+>IGHD4-23*01
+tgactacggtggtaactcc
+>IGHD4-4*01
+tgactacagtaactac
+>IGHD5-12*01
+gtggatatagtggctacgattac
+>IGHD5-18*01
+gtggatacagctatggttac
+>IGHD5-24*01
+gtagagatggctacaattac
+>IGHD5-5*01
+gtggatacagctatggttac
+>IGHD6-13*01
+gggtatagcagcagctggtac
+>IGHD6-19*01
+gggtatagcagtggctggtac
+>IGHD6-25*01
+gggtatagcagcggctac
+>IGHD6-6*01
+gagtatagcagctcgtcc
+>IGHD7-27*01
+ctaactgggga
diff --git a/data/germline/human/IG_antevolo/IGHJ.fa b/data/germline/human/IG_antevolo/IGHJ.fa
new file mode 100644
index 00000000..26c1ce32
--- /dev/null
+++ b/data/germline/human/IG_antevolo/IGHJ.fa
@@ -0,0 +1,12 @@
+>IGHJ1*01
+gctgaatacttccagcactggggccagggcaccctggtcaccgtctcctcag
+>IGHJ2*01
+ctactggtacttcgatctctggggccgtggcaccctggtcactgtctcctcag
+>IGHJ3*01
+tgatgcttttgatgtctggggccaagggacaatggtcaccgtctcttcag
+>IGHJ4*01
+actactttgactactggggccaaggaaccctggtcaccgtctcctcag
+>IGHJ5*01
+acaactggttcgactcctggggccaaggaaccctggtcaccgtctcctcag
+>IGHJ6*01
+attactactactactacggtatggacgtctggggcaaagggaccacggtcaccgtctcctcag
diff --git a/data/germline/human/IG_antevolo/IGHV.fa b/data/germline/human/IG_antevolo/IGHV.fa
new file mode 100644
index 00000000..29732486
--- /dev/null
+++ b/data/germline/human/IG_antevolo/IGHV.fa
@@ -0,0 +1,170 @@
+>IGHV1-18*01
+caggttcagctggtgcagtctggagctgaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacacctttaccagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttacaatggtaacacaaactatgcacagaagctccagggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga
+>IGHV1-2*01
+caggtgcagctggtgcagtctggggctgaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttcaccggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaacagtggtggcacaaactatgcacagaagtttcagggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
+>IGHV1-24*01
+caggtccagctggtacagtctggggctgaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggtttccggatacaccctcactgaattatccatgcactgggtgcgacaggctcctggaaaagggcttgagtggatgggaggttttgatcctgaagatggtgaaacaatctacgcacagaagttccagggcagagtcaccatgaccgaggacacatctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga
+>IGHV1-3*01
+caggtccagcttgtgcagtctggggctgaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttcactagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggatcaacgctggcaatggtaacacaaaatattcacagaagttccagggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtgcgagaga
+>IGHV1-38-4*01
+caggtccagctggtgcagtcttgggctgaggtgaggaagtctggggcctcagtgaaagtctcctgtagtttttctgggtttaccatcaccagctacggtatacattgggtgcaacagtcccctggacaagggcttgagtggatgggatggatcaaccctggcaatggtagcccaagctatgccaagaagtttcagggcagattcaccatgaccagggacatgtccacaaccacagcctacacagacctgagcagcctgacatctgaggacatggctgtgtattactatgcaagaca
+>IGHV1-45*01
+cagatgcagctggtgcagtctggggctgaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttcacctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttcaatggtaacaccaactacgcacagaaattccaggacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagata
+>IGHV1-46*01
+caggtgcagctggtgcagtctggggctgaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttcaccagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagtggtggtagcacaagctacgcacagaagttccagggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-58*01
+caaatgcagctggtgcagtctgggcctgaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcacctttactagctctgctgtgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggcagtggtaacacaaactacgcacagaagttccaggaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga
+>IGHV1-68*01
+caggtgcagctggggcagtctgaggctgaggtaaagaagcctggggcctcagtgaaggtctcctgcaaggcttccggatacaccttcacttgctgctccttgcactggttgcaacaggcccctggacaagggcttgaaaggatgagatggatcacactttacaatggtaacaccaactatgcaaagaagttccagggcagagtcaccattaccagggacatgtccctgaggacagcctacatagagctgagcagcctgagatctgaggactcggctgtgtattactgggcaagata
+>IGHV1-69*01
+caggtccagctggtgcagtctggggctgaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttcagcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatctttggtacagcaaactacgcacagaagttccagggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-69-2*01
+gaggtccagctggtacagtctggggctgaggtgaagaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttcaccgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaagatggtgaaacaatatacgcagagaagttccagggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga
+>IGHV1-69D*01
+caggtgcagctggtgcagtctggggctgaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttcagcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatctttggtacagcaaactacgcacagaagttccagggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-8*01
+caggtgcagctggtgcagtctggggctgaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttcaccagttatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaacagtggtaacacaggctatgcacagaagttccagggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg
+>IGHV1-NL1*01
+caggttcagctgttgcagcctggggtccaggtgaagaagcctgggtcctcagtgaaggtctcctgctaggcttccagatacaccttcaccaaatactttacacggtgggtgtgacaaagccctggacaagggcatnagtggatgggatgaatcaacccttacaacgataacacacactacgcacagacgttctggggcagagtcaccattaccagtgacaggtccatgagcacagcctacatggagctgagcngcctgagatccgaagacatggtcgtgtattactgtgtgagaga
+>IGHV2-10*01
+caggtcaccttgaaggagtctggtcctgcactggtgaaacccacacagaccctcatgctgacctgcaccttctctgggttctcactcagcacttctggaatgggtgtgggttagatctgtcagccctcagcaaaggccctggagtggcttgcacacatttattagaatgataataaatactacagcccatctctgaagagtaggctcattatctccaaggacacctccaagaatgaagtggttctaacagtgatcaacatggacattgtggacacagccacacattactgtgcaaggagac
+>IGHV2-26*01
+caggtcaccttgaaggagtctggtcctgtgctggtgaaacccacagagaccctcacgctgacctgcaccgtctctgggttctcactcagcaatgctagaatgggtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacacattttttcgaatgacgaaaaatcctacagcacatctctgaagagcaggctcaccatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacatattactgtgcacggatac
+>IGHV2-5*01
+cagatcaccttgaaggagtctggtcctacgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagcactagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggatgatgataagcgctacagcccatctctgaagagcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
+>IGHV2-70*01
+caggtcaccttgaaggagtctggtcctgcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagcactagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggatgatgataaatactacagcacatctctgaagaccaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
+>IGHV2-70D*01
+caggtcaccttgaaggagtctggtcctgcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagcactagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggatgatgataaattctacagcacatctctgaagaccaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
+>IGHV3-11*01
+caggtgcagctggtggagtctgggggaggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttcagtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagtagtagttacacaaactacgcagactctgtgaagggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
+>IGHV3-13*01
+gaggtgcagctggtggagtctgggggaggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttcagtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgctggtgacacatactatccaggctccgtgaagggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
+>IGHV3-15*01
+gaggtgcagctggtggagtctgggggaggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttcagtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaaggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
+>IGHV3-16*01
+gaggtacaactggtggagtctgggggaggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttcagtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaatggcagtaggacgcactatgtggactccgtgaagcgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa
+>IGHV3-19*01
+acagtgcagctggtggagtctgggggaggcttggtagagcctggggggtccctgagactctcctgtgcagcctctggattcaccttcagtaacagtgacatgaactgggtccgccaggctccaggaaaggggctggagtgggtatcgggtgttagttggaatggcagtaggacgcactatgcagactctgtgaagggccgattcatcatctccagagacaattccaggaacttcctgtatcagcaaatgaacagcctgaggcccgaggacatggctgtgtattactgtgtgagaaa
+>IGHV3-20*01
+gaggtgcagctggtggagtctgggggaggtgtggtacggcctggggggtccctgagactctcctgtgcagcctctggattcacctttgatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaatggtggtagcacaggttatgcagactctgtgaagggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga
+>IGHV3-21*01
+gaggtgcagctggtggagtctgggggaggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttcagtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagtagtagttacatatactacgcagactcagtgaagggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-22*01
+gaggtgcatctggtggagtctgggggagccttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttcagttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaaggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga
+>IGHV3-23*01
+gaggtgcagctgttggagtctgggggaggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcacctttagcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagtggtggtagcacatactacgcagactccgtgaagggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
+>IGHV3-23D*01
+gaggtgcagctgttggagtctgggggaggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcacctttagcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagtggtggtagcacatactacgcagactccgtgaagggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
+>IGHV3-25*01
+gagatgcagctggtggagtctgggggaggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttcagtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaatgggggtagcacatacctcatagactccggtaaggaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
+>IGHV3-29*01
+gaggtggagctgatagagcccacagaggacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttcagtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgatggaagtcagatacaccatgcagactctgtgaagggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt
+>IGHV3-30*01
+caggtgcagctggtggagtctgggggaggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttcagtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgatggaagtaataaatactacgcagactccgtgaagggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30-2*01
+gaggtacagctcgtggagtccggagaggacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttcagtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgatggaagtcagatatgttatgcataatctttgaagagcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgaggca
+>IGHV3-30-22*01
+gaggtggagctgatagagtccatagaggacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttcagtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgatggaagtcagatacaccatgcagactctgtgaagggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagagctgaggacatggacgtgtatggctgtacataaggtc
+>IGHV3-30-3*01
+caggtgcagctggtggagtctgggggaggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttcagtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgatggaagcaataaatactacgcagactccgtgaagggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30-33*01
+gaggtacagctcgtggagtccggagaggacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttcagtagctactgaaggagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgatggaagtcagatatgttatgcataatctttgaagagcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgagg
+>IGHV3-30-42*01
+gaggtggagctgatagagcccacagaggacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttcagtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgatggaagtcagatacaccatgcagactctgtgaagggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt
+>IGHV3-30-5*01
+caggtgcagctggtggagtctgggggaggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttcagtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgatggaagtaataaatactatgcagactccgtgaagggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
+>IGHV3-30-52*01
+gaggtacagctcgtggagtccggagaggacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttcagtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgatggaagtcagatatgttatgcataatctttgaagagcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgagg
+>IGHV3-32*01
+gaggtggagctgatagagtccatagaggacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttcagtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgatggaagtcagatacaccatgcagactctgtgaagggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacactcagagagctgaggacgtggccgtgtatggctatacataaggtc
+>IGHV3-33*01
+caggtgcagctggtggagtctgggggaggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttcagtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgatggaagtaataaatactatgcagactccgtgaagggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-33-2*01
+gaggtacagctcgtggagtccggagaggacccaagacaacctgggggatccttgagactctcctgtgcagactctggattaaccttcagtagctactgaatgagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgatggaagtcagatatgttatgcccaatctgtgaagagcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgcaaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgaggca
+>IGHV3-35*01
+gaggtgcagctggtggagtctgggggaggcttggtacagcctgggggatccctgagactctcctgtgcagcctctggattcaccttcagtaacagtgacatgaactgggtccatcaggctccaggaaaggggctggagtgggtatcgggtgttagttggaatggcagtaggacgcactatgcagactctgtgaagggccgattcatcatctccagagacaattccaggaacaccctgtatctgcaaacgaatagcctgagggccgaggacacggctgtgtattactgtgtgagaaa
+>IGHV3-38*01
+gaggtgcagctggtggagtctgggggaggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtcagtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggtggtagcacatactacgcagactccaggaagggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata
+>IGHV3-38-3*01
+gaggtgcagctggtggagtctcggggagtcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtcagtagcaatgagatgagctgggtccgccaggctccagggaagggtctggagtgggtctcatccattagtggtggtagcacatactacgcagactccaggaagggcagattcaccatctccagagacaattccaagaacacgctgcatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtaagaaaga
+>IGHV3-43*01
+gaagtgcagctggtggagtctgggggagtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcacctttgatgattataccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggatggtggtagcacatactatgcagactctgtgaagggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata
+>IGHV3-43D*01
+gaagtgcagctggtggagtctgggggagtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcacctttgatgattatgccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggatggtggtagcacctactatgcagactctgtgaagggtcgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagagctgaggacaccgccttgtattactgtgcaaaagata
+>IGHV3-47*01
+gaggatcagctggtggagtctgggggaggcttggtacagcctggggggtccctgcgaccctcctgtgcagcctctggattcgccttcagtagctatgctctgcactgggttcgccgggctccagggaagggtctggagtgggtatcagctattggtactggtggtgatacatactatgcagactccgtgatgggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcatatgaacagcctgatagctgaggacatggctgtgtattattgtgcaagaga
+>IGHV3-48*01
+gaggtgcagctggtggagtctgggggaggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttcagtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagtagtagtaccatatactacgcagactctgtgaagggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-49*01
+gaggtgcagctggtggagtctgggggaggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcacctttggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaaggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
+>IGHV3-52*01
+gaggtgcagctggtggagtctgggtgaggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttcagtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgacggaagtgagaaatactatgtagactctgtgaagggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgagagg
+>IGHV3-53*01
+gaggtgcagctggtggagtctggaggaggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtcagtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggtggtagcacatactacgcagactccgtgaagggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
+>IGHV3-54*01
+gaggtacagctggtggagtctgaagaaaaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttcagtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtaggatagaagtcagctatgttatgcacaatctgtgaagagcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt
+>IGHV3-62*01
+gaggtgcagctggtggagtctggggaaggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttcagtagctctgctatgcactgggtccgccaggctccaagaaagggtttgtagtgggtctcagttattagtacaagtggtgataccgtactctacacagactctgtgaagggccgattcaccatctccagagacaatgcccagaattcactgtctctgcaaatgaacagcctgagagccgagggcacagttgtgtactactgtgtgaaaga
+>IGHV3-63*01
+gaggtggagctgatagagtccatagagggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttcagtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgatggaagtcagatataccatgcagactctgtgaagggcagattcaccatctccaaagacaatgctaagaactcaccgtatctccaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataaggtt
+>IGHV3-64*01
+gaggtgcagctggtggagtctgggggaggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttcagtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaatgggggtagcacatactacgcagactcagtgaagggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgagcagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-64D*01
+gaggtgcagctggtggagtctgggggaggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttcagtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaatgggggtagcacatactacgcagactccgtgaagggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
+>IGHV3-66*01
+gaggtgcagctggtggagtctgggggaggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtcagtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggtggtagcacatactacgcagactccgtgaagggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-69-1*01
+gaggtgcagctggtggagtctgggggaggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttcagtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagtagtaccatatactacgcagactctgtgaagggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-7*01
+gaggtgcagctggtggagtctgggggaggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcacctttagtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagatggaagtgagaaatactatgtggactctgtgaagggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-71*01
+gaggtgcagctggtggagtctgggggaggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttcagtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaaggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-72*01
+gaggtgcagctggtggagtctgggggaggcttggtccagcctggagggtccctgagactctcctgtgcagcctctggattcaccttcagtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagttacaccacagaatacgccgcgtctgtgaaaggcagattcaccatctcaagagatgattcaaagaactcactgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtgctagaga
+>IGHV3-73*01
+gaggtgcagctggtggagtctgggggaggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttcagtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaaggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca
+>IGHV3-74*01
+gaggtgcagctggtggagtccgggggaggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttcagtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgatgggagtagcacaagctacgcggactccgtgaagggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga
+>IGHV3-9*01
+gaagtgcagctggtggagtctgggggaggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcacctttgatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaatagtggtagcataggctatgcggactctgtgaagggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata
+>IGHV3-NL1*01
+caggtgcagctggtggagtctgggggaggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttcagtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtctcagttatttatagcggtggtagtagcacatactatgcagactccgtgaagggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
+>IGHV4-28*01
+caggtgcagctgcaggagtcgggcccaggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagcagtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagtgggagcacctactacaacccgtccctcaagagtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
+>IGHV4-30-2*01
+cagctgcagctgcaggagtccggctcaggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagcagtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagtgggagcacctactacaacccgtccctcaagagtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
+>IGHV4-30-4*01
+caggtgcagctgcaggagtcgggcccaggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagcagtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagtgggagcacctactacaacccgtccctcaagagtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
+>IGHV4-31*01
+caggtgcagctgcaggagtcgggcccaggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagcagtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagtgggagcacctactacaacccgtccctcaagagtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-34*01
+caggtgcagctacagcagtggggcgcaggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttcagtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagtggaagcaccaactacaacccgtccctcaagagtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
+>IGHV4-38-2*01
+caggtgcagctgcaggagtcgggcccaggactggtgaagccttcggagaccctgtccctcacctgcgctgtctctggttactccatcagcagtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagtgggagcacctactacaacccgtccctcaagagtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
+>IGHV4-39*01
+cagctgcagctgcaggagtcgggcccaggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagcagtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagtgggagcacctactacaacccgtccctcaagagtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaga
+>IGHV4-4*01
+caggtgcagctgcaggagtcgggcccaggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagcagtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagtgggagcaccaactacaacccgtccctcaagagtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-55*01
+caggtgcagctgcaggagtcgggcccaggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagcagtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagtgggagcacctactacaacccgtccctcaagagtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
+>IGHV4-59*01
+caggtgcagctgcaggagtcgggcccaggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagtgggagcaccaactacaacccctccctcaagagtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-61*01
+caggtgcagctgcaggagtcgggcccaggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagcagtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagtgggagcaccaactacaacccctccctcaagagtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
+>IGHV5-10-1*01
+gaagtgcagctggtgcagtctggagcagaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagctttaccagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagtgactcttataccaactacagcccgtccttccaaggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca
+>IGHV5-51*01
+gaggtgcagctggtgcagtctggagcagaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagctttaccagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggtgactctgataccagatacagcccgtccttccaaggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca
+>IGHV5-78*01
+gaggtgcagctgttgcagtctgcagcagaggtgaaaagacccggggagtctctgaggatctcctgtaagacttctggatacagctttaccagctactggatccactgggtgcgccagatgcccgggaaagaactggagtggatggggagcatctatcctgggaactctgataccagatacagcccatccttccaaggccacgtcaccatctcagccgacagctccagcagcaccgcctacctgcagtggagcagcctgaaggcctcggacgccgccatgtattattgtgtgaga
+>IGHV6-1*01
+caggtacagctgcagcagtcaggtccaggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctctagcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtccaagtggtataatgattatgcagtatctgtgaaaagtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga
+>IGHV7-34-1*01
+ctgcagctggtgcagtctgggcctgaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttcaccatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctacactgggaacccaacgtatacccacggcttcacaggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta
+>IGHV7-4-1*01
+caggtgcagctggtgcaatctgggtctgagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttcactagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaacactgggaacccaacgtatgcccagggcttcacaggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga
+>IGHV7-40*01
+ttttcaatagaaaagtcaaataatctaagtgtcaatcagtggatgattagataaaatatgatatatgtaaatcatggaatactatgcagccagtatggtatgaattcagtgtgaccagcccctggacaagggcttgagtggatgggatggatcatcacctacactgggaacccaacatataccaacggcttcacaggacggtttctattctccatggacacctctgtcagcatggcgtatctgcagatcagcagcctaaaggctgaggacacggccgtgtatgactgtatgagaga
+>IGHV7-81*01
+caggtgcagctggtgcagtctggccatgaggtgaagcagcctggggcctcagtgaaggtctcctgcaaggcttctggttacagtttcaccacctatggtatgaattgggtgccacaggcccctggacaagggcttgagtggatgggatggttcaacacctacactgggaacccaacatatgcccagggcttcacaggacggtttgtcttctccatggacacctctgccagcacagcatacctgcagatcagcagcctaaaggctgaggacatggccatgtattactgtgcgagata
diff --git a/data/germline/human/IG_antevolo/IGKJ.fa b/data/germline/human/IG_antevolo/IGKJ.fa
new file mode 100644
index 00000000..42309e92
--- /dev/null
+++ b/data/germline/human/IG_antevolo/IGKJ.fa
@@ -0,0 +1,10 @@
+>IGKJ1*01
+gtggacgttcggccaagggaccaaggtggaaatcaaac
+>IGKJ2*01
+tgtgcacttttggccaggggaccaagctggagatcaaac
+>IGKJ3*01
+attcactttcggccctgggaccaaagtggatatcaaac
+>IGKJ4*01
+gctcactttcggcggagggaccaaggtggagatcaaac
+>IGKJ5*01
+gatcaccttcggccaagggacacgactggagattaaac
diff --git a/data/germline/human/IG_antevolo/IGKV.fa b/data/germline/human/IG_antevolo/IGKV.fa
new file mode 100644
index 00000000..88b1622a
--- /dev/null
+++ b/data/germline/human/IG_antevolo/IGKV.fa
@@ -0,0 +1,144 @@
+>IGKV1-12*01
+gacatccagatgacccagtctccatcttccgtgtctgcatctgtaggagacagagtcaccatcacttgtcgggcgagtcagggtattagcagctggttagcctggtatcagcagaaaccagggaaagcccctaagctcctgatctatgctgcatccagtttgcaaagtggggtcccatcaaggttcagcggcagtggatctgggacagatttcactctcaccatcagcagcctgcagcctgaagattttgcaacttactattgtcaacaggctaacagtttccctcc
+>IGKV1-13*01
+gccatccagttgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcaagtcagggcattagcagtgctttagcctgatatcagcagaaaccagggaaagctcctaagctcctgatctatgatgcctccagtttggaaagtggggtcccatcaaggttcagcggcagtggatctgggacagatttcactctcaccatcagcagcctgcagcctgaagattttgcaacttattactgtcaacagtttaataattaccctca
+>IGKV1-16*01
+gacatccagatgacccagtctccatcctcactgtctgcatctgtaggagacagagtcaccatcacttgtcgggcgagtcagggcattagcaattatttagcctggtttcagcagaaaccagggaaagcccctaagtccctgatctatgctgcatccagtttgcaaagtggggtcccatcaaggttcagcggcagtggatctgggacagatttcactctcaccatcagcagcctgcagcctgaagattttgcaacttattactgccaacagtataatagttaccctcc
+>IGKV1-17*01
+gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcaagtcagggcattagaaatgatttaggctggtatcagcagaaaccagggaaagcccctaagcgcctgatctatgctgcatccagtttgcaaagtggggtcccatcaaggttcagcggcagtggatctgggacagaattcactctcacaatcagcagcctgcagcctgaagattttgcaacttattactgtctacagcataatagttaccctcc
+>IGKV1-27*01
+gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcgagtcagggcattagcaattatttagcctggtatcagcagaaaccagggaaagttcctaagctcctgatctatgctgcatccactttgcaatcaggggtcccatctcggttcagtggcagtggatctgggacagatttcactctcaccatcagcagcctgcagcctgaagatgttgcaacttattactgtcaaaagtataacagtgcccctcc
+>IGKV1-33*01
+gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccaggcgagtcaggacattagcaactatttaaattggtatcagcagaaaccagggaaagcccctaagctcctgatctacgatgcatccaatttggaaacaggggtcccatcaaggttcagtggaagtggatctgggacagattttactttcaccatcagcagcctgcagcctgaagatattgcaacatattactgtcaacagtatgataatctccctcc
+>IGKV1-37*01
+gacatccagttgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggtgagtcagggcattagcagttatttaaattggtatcggcagaaaccagggaaagttcctaagctcctgatctatagtgcatccaatttgcaatctggagtcccatctcggttcagtggcagtggatctgggacagatttcactctcactatcagcagcctgcagcctgaagatgttgcaacttattacggtcaacggacttacaatgcccctcc
+>IGKV1-39*01
+gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcaagtcagagcattagcagctatttaaattggtatcagcagaaaccagggaaagcccctaagctcctgatctatgctgcatccagtttgcaaagtggggtcccatcaaggttcagtggcagtggatctgggacagatttcactctcaccatcagcagtctgcaacctgaagattttgcaacttactactgtcaacagagttacagtacccctcc
+>IGKV1-5*01
+gacatccagatgacccagtctccttccaccctgtctgcatctgtaggagacagagtcaccatcacttgccgggccagtcagagtattagtagctggttggcctggtatcagcagaaaccagggaaagcccctaagctcctgatctatgatgcctccagtttggaaagtggggtcccatcaaggttcagcggcagtggatctgggacagaattcactctcaccatcagcagcctgcagcctgatgattttgcaacttattactgccaacagtataatagttattctcc
+>IGKV1-6*01
+gccatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcaagtcagggcattagaaatgatttaggctggtatcagcagaaaccagggaaagcccctaagctcctgatctatgctgcatccagtttacaaagtggggtcccatcaaggttcagcggcagtggatctggcacagatttcactctcaccatcagcagcctgcagcctgaagattttgcaacttattactgtctacaagattacaattaccctcc
+>IGKV1-8*01
+gccatccggatgacccagtctccatcctcattctctgcatctacaggagacagagtcaccatcacttgtcgggcgagtcagggtattagcagttatttagcctggtatcagcaaaaaccagggaaagcccctaagctcctgatctatgctgcatccactttgcaaagtggggtcccatcaaggttcagcggcagtggatctgggacagatttcactctcaccatcagctgcctgcagtctgaagattttgcaacttattactgtcaacagtattatagttaccctcc
+>IGKV1-9*01
+gacatccagttgacccagtctccatccttcctgtctgcatctgtaggagacagagtcaccatcacttgccgggccagtcagggcattagcagttatttagcctggtatcagcaaaaaccagggaaagcccctaagctcctgatctatgctgcatccactttgcaaagtggggtcccatcaaggttcagcggcagtggatctgggacagaattcactctcacaatcagcagcctgcagcctgaagattttgcaacttattactgtcaacagcttaatagttaccctcc
+>IGKV1-NL1*01
+gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcgagtcagggcattagcaattctttagcctggtatcagcagaaaccagggaaagcccctaagctcctgctctatgctgcatccagattggaaagtggggtcccatccaggttcagtggcagtggatctgggacggattacactctcaccatcagcagcctgcagcctgaagattttgcaacttattactgtcaacagtattatagtacccctcc
+>IGKV1/OR-2*01
+gacatccagatgacccagtctccatcctccctgtctgcatctgtaggaggcagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccaaggaaaactcctaagctcctgatctatgctgcatccagtctgcaaagtgggattccctctcggttcagtgacagtggatctgggacagattacactctcaccatcagcagcctgcagcctgaagattttgcaacttattactgtcaacagagtgacagtaaccctcc
+>IGKV1/OR-3*01
+gacatccagatgacccagtctccatcctccctgtctgcatctgtaggaggcagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccagggaaaactcctaagctcctgatctatgctgcatccagtctgcaaagtgggattccctctcggttcagtgacagtggatctgggacagattacactctcaccatcagcagcctgcagcctgaagattttgcagcttattactgtcaacagagtgacagtacccctcc
+>IGKV1/OR-4*01
+gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccagggaaaactcctaagttcctgatctatgcagcatccagtctgcaaagtgggattccctctcggttcagtgacagtggatctgggacagattacactctcaccatcagcagcctgcagcctgaagattttgcaacttattactgtcaacagagtgacagtacccctcc
+>IGKV1/OR1-1*01
+gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccagggaaaactcctaagctcctgatctatgctgcatccagtctgcaaagtgggattccctctcggttcagtgacagtggatctggggcagactacactctcaccatccgcagcctgcagcctgaagattttgcaacttattactgtcaacagagtgacagtacccctcc
+>IGKV1/OR10-1*01
+gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccagggaaaactcctaagctcctgatctatgctgcatccagtctgcaaagtgggattccctctcggttcagtgacagtggatctgggacagattacactctcaccatcagcagcctgcagcctgaagattttgcaacttattactgtcaacagagtgacagtacctctcc
+>IGKV1/OR15-118*01
+gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccagggaaaactcctaagctcctgatctatgctgcacccagtctgcaaagtgggattccctctcggttcagtgacagtggatctggggcagattacactctcaccatccgcagcctgcagcctgaagattttgcaacttattagtgtcaacagagtgacagtacccctcc
+>IGKV1/OR2-0*01
+gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccagggaaaactcctaagctcctgatctatgctgcacccagtctgcaaagtgggattccctctcggttcagtgacagtggatctggggcagattacactctcaccatccgcagcctgcagcctgaagattttgcaacttattactgtcaacagagtgacagtacccctcc
+>IGKV1/OR2-1*01
+gacatccagatgacccagtctccatcctccctgtctgcatctgtaggaggcagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccagggaaaactcctaagctcctgatctatgctgcatccagtctgcaaagtgggattccctctcggttcagtgacagtggatctggggcagattacactctcaccatcagcagcctgcagcctgaagattttgcagcttattactgtcaacagagtgacagtacccctcc
+>IGKV1/OR2-108*01
+gacatccaggtgacccagtctccatcttccctgtctgcgtctgtaggagacagagtcaccatcacctgccgggcaagtcagggcattagcaatgggttatcctggtatcagcagaaaccagggcaagcccctacgctcctgatctatgctgcatccagtttgcagtcgggggtcccatctcggttcagtggcagtggatctgggacagatttcactctcaccatcagcagcctgcagcctgaagatgttgcaacttattactgtctacaggattatactaccccatt
+>IGKV1/OR2-11*01
+gacatccagatgactcagcctccatcctccctgtctgcatctgtaggagacagagccaccgtctcttgccaggctagtcaaagcatttacaactatttaaattggtatcagcagaaaccagggaaagcacctaagttcctgacctatagggcatccagtttgcagagggcgatgccatctcagttcagtggcagcggatatggaagagatttcactctcaccgtcagcagcctgcagcctgaagattttgcaacttattaatgtcaacaagagagcattttccctcc
+>IGKV1/OR2-118*01
+gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccagggaaaactcctaagctcctgatctatgctgcatccagtctgcaaagtgggattccctctcggttcagtgacagtggatctggggcagattacactctcaccatccgcagcctgcagcctgaagattttgcaaattattactgtcaacagagtgacagtacccctcc
+>IGKV1/OR2-2*01
+gacatccagatgacccagtctccatcctccctgtctgcatctgtaggaggcagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccagggaaaactcctaagctcctgatctatgctgcatccagtctgcaaagtgggattccctctcggttcagtgacagtggatctggggcagattacactctcaccatcagcagcctgcagcctgaagattttgcagcttattactgtcaacagagtgacagtacccctcc
+>IGKV1/OR2-3*01
+gacatccagatgacccagcctccatcctccctgtctgcatctgtaggagacagagtcaccgtctcttgccaggctagtcaaagcatttacaactatttaaattggtatcagcagaaaccagggaaagcacctaagttcctgacctatagggcatccagtttgcagagggggatgccatctcagttcagtggcagcggatatggaagagatttcactctcactgtcagcagcctgcagcctgaagattttgcaacttattaatgtcaacaagagagcattttccctct
+>IGKV1/OR2-9*01
+gacatccagatgactcagcctccatcctccctgtctgcatctgtaggagacagagccaccgtctcttgccaggctagtcaaagcatttacaactatttaaattggtatcagcagaaaccagggaaagcacctaagttcctgacctatagggcatccagtttgcagagggcgatgccatctcagttcagtggcagcggatatggaagagatttcactctcaccgtcagcagcctgcagcctgaagattttgcaacttattaatgtcaacaagagagcattttccctcc
+>IGKV1/OR22-5*01
+gacatccagatgacccagtctccatcctccctgtctgcatctgtaggaggcagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccagggaaaactcctaagcccctgatctatgctgcatccagtctgcaaagtgggattccctctcagttcagtgacagtggatctgggacagattagactctcaccatcagcagcctgcagcctgaagattttgcaacttattactgtcaacagagttacagtacccctcc
+>IGKV1/OR9-1*01
+gacatccagatgacccagtctccatcctccctgtctgcatctgtaggaggcagagtcaccatcacttgccgggtgagtcagggcattagcaataatttaaattggtatcagcagaaaccaaggaaaactcctaagctcctgatctatgctgcatccagtctgcaaagtgggattccctctcggttcagtgacagtggatctgggacagattacactctcaccatcagcagcctgcagcctgaagattttgcaacctattactgtcaacagagtgacagtaaccctcc
+>IGKV1/OR9-2*01
+gacatccagatgacccagtctccatcctccctgtctgcatctgtaggaggcagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccaaggaaaactcctaagctcctgatctatgctgcatccagtctgcaaagtgggattccctctcggttcagtgacagtggatctgggacagattacactctcaccatcagcagcctgcagcctgaagattttgcaacctattactgtcaacagagtgacagtaaccctcc
+>IGKV1/ORY-1*01
+gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcgagtcagggcattatcaataatttaaattggtatcagaagaaaccagggaaaactcctaagctcctgatctatgctgcatccagtctgcaaagtgggattcccactcggttcagtgacagtggatctgggacagattacactcccaccatcagcagcctgcagcctgaagattttgcaacttactactgtcaacagagtgacagtacccctcc
+>IGKV1D-12*01
+gacatccagatgacccagtctccatcttctgtgtctgcatctgtaggagacagagtcaccatcacttgtcgggcgagtcagggtattagcagctggttagcctggtatcagcagaaaccagggaaagcccctaagctcctgatctatgctgcatccagtttgcaaagtggggtcccatcaaggttcagcggcagtggatctgggacagatttcactctcactatcagcagcctgcagcctgaagattttgcaacttactattgtcaacaggctaacagtttccctcc
+>IGKV1D-13*01
+gccatccagttgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcaagtcagggcattagcagtgctttagcctggtatcagcagaaaccagggaaagctcctaagctcctgatctatgatgcctccagtttggaaagtggggtcccatcaaggttcagcggcagtggatctgggacagatttcactctcaccatcagcagcctgcagcctgaagattttgcaacttattactgtcaacagtttaataattaccctca
+>IGKV1D-16*01
+gacatccagatgacccagtctccatcctcactgtctgcatctgtaggagacagagtcaccatcacttgtcgggcgagtcagggtattagcagctggttagcctggtatcagcagaaaccagagaaagcccctaagtccctgatctatgctgcatccagtttgcaaagtggggtcccatcaaggttcagcggcagtggatctgggacagatttcactctcaccatcagcagcctgcagcctgaagattttgcaacttattactgccaacagtataatagttaccctcc
+>IGKV1D-17*01
+aacatccagatgacccagtctccatctgccatgtctgcatctgtaggagacagagtcaccatcacttgtcgggcgaggcagggcattagcaattatttagcctggtttcagcagaaaccagggaaagtccctaagcacctgatctatgctgcatccagtttgcaaagtggggtcccatcaaggttcagcggcagtggatctgggacagaattcactctcacaatcagcagcctgcagcctgaagattttgcaacttattactgtctacagcataatagttaccctcc
+>IGKV1D-33*01
+gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccaggcgagtcaggacattagcaactatttaaattggtatcagcagaaaccagggaaagcccctaagctcctgatctacgatgcatccaatttggaaacaggggtcccatcaaggttcagtggaagtggatctgggacagattttactttcaccatcagcagcctgcagcctgaagatattgcaacatattactgtcaacagtatgataatctccctcc
+>IGKV1D-37*01
+gacatccagttgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggtgagtcagggcattagcagttatttaaattggtatcggcagaaaccagggaaagttcctaagctcctgatctatagtgcatccaatttgcaatctggagtcccatctcggttcagtggcagtggatctgggacagatttcactctcactatcagcagcctgcagcctgaagatgttgcaacttattacggtcaacggacttacaatgcccctcc
+>IGKV1D-39*01
+gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcaagtcagagcattagcagctatttaaattggtatcagcagaaaccagggaaagcccctaagctcctgatctatgctgcatccagtttgcaaagtggggtcccatcaaggttcagtggcagtggatctgggacagatttcactctcaccatcagcagtctgcaacctgaagattttgcaacttactactgtcaacagagttacagtacccctcc
+>IGKV1D-42*01
+gacatccagatgatccagtctccatctttcctgtctgcatctgtaggagacagagtcagtatcatttgctgggcaagtgagggcattagcagtaatttagcctggtatctgcagaaaccagggaaatcccctaagctcttcctctatgatgcaaaagatttgcaccctggggtctcatcgaggttcagtggcaggggatctgggacggatttcactctcaccatcatcagcctgaagcctgaagattttgcagcttattactgtaaacaggacttcagttaccctcc
+>IGKV1D-43*01
+gccatccggatgacccagtctccattctccctgtctgcatctgtaggagacagagtcaccatcacttgctgggccagtcagggcattagcagttatttagcctggtatcagcaaaaaccagcaaaagcccctaagctcttcatctattatgcatccagtttgcaaagtggggtcccatcaaggttcagcggcagtggatctgggacggattacactctcaccatcagcagcctgcagcctgaagattttgcaacttattactgtcaacagtattatagtacccctcc
+>IGKV1D-8*01
+gtcatctggatgacccagtctccatccttactctctgcatctacaggagacagagtcaccatcagttgtcggatgagtcagggcattagcagttatttagcctggtatcagcaaaaaccagggaaagcccctgagctcctgatctatgctgcatccactttgcaaagtggggtcccatcaaggttcagtggcagtggatctgggacagatttcactctcaccatcagctgcctgcagtctgaagattttgcaacttattactgtcaacagtattatagtttccctcc
+>IGKV2-18*01
+gatattgtgatgacccagactccaccctccctgcccgtcaaccctggagagccggcctccatctcttgcaggtctagtcagagcctcctgcatagtaatggatatacctatttgcattggtacctgcagaagccagggcagtctccacagctcctgatttatagggtttccaatcatctttctggggtcccagacaggtttagtggcagtgggtcaggtagtgatttcacactgaaaatcagctgggtggaggctgaggatgttggggtttattactgcatgcaagctacacagtttcctaa
+>IGKV2-24*01
+gatattgtgatgacccagactccactctcctcacctgtcacccttggacagccggcctccatctcctgcaggtctagtcaaagcctcgtacacagtgatggaaacacctacttgagttggcttcagcagaggccaggccagcctccaagactcctaatttataagatttctaaccggttctctggggtcccagacagattcagtggcagtggggcagggacagatttcacactgaaaatcagcagggtggaagctgaggatgtcggggtttattactgcatgcaagctacacaatttcctca
+>IGKV2-28*01
+gatattgtgatgactcagtctccactctccctgcccgtcacccctggagagccggcctccatctcctgcaggtctagtcagagcctcctgcatagtaatggatacaactatttggattggtacctgcagaagccagggcagtctccacagctcctgatctatttgggttctaatcgggcctccggggtccctgacaggttcagtggcagtggatcaggcacagattttacactgaaaatcagcagagtggaggctgaggatgttggggtttattactgcatgcaagctctacaaactcctcc
+>IGKV2-29*01
+gatattgtgatgacccagactccactctctctgtccgtcacccctggacagccggcctccatctcctgcaagtctagtcagagcctcctgcatagtgatggaaagacctatttgtattggtacctgcagaagccaggccagtctccacagctcctgatctatgaagtttccagccggttctctggagtgccagataggttcagtggcagcgggtcagggacagatttcacactgaaaatcagccgggtggaggctgaggatgttggggtttattactgcatgcaaggtatacaccttcctcc
+>IGKV2-30*01
+gatgttgtgatgactcagtctccactctccctgcccgtcacccttggacagccggcctccatctcctgcaggtctagtcaaagcctcgtatacagtgatggaaacacctacttgaattggtttcagcagaggccaggccaatctccaaggcgcctaatttataaggtttctaaccgggactctggggtcccagacagattcagcggcagtgggtcaggcactgatttcacactgaaaatcagcagggtggaggctgaggatgttggggtttattactgcatgcaaggtacacactggcctcc
+>IGKV2-4*01
+gatattgtgatgacccagcatctgctctccctgcccatccctctgggagagccggcctccatctcctgcaggtctagtcagagcctcctgcatagtgatggaaacacctatttggattggtacctgcagaagccaggccagtctccacagcttcttatctacacaatttctaacaaattctatggagtcccaaacaagttcagtggcagcaggtcagggacaggtttcacacttaaattcagcaaagtggaggctgaggatgttggggtttattgctgtgaacagggtctgcaaggtcctca
+>IGKV2-40*01
+gatattgtgatgacccagactccactctccctgcccgtcacccctggagagccggcctccatctcctgcaggtctagtcagagcctcttggatagtgatgatggaaacacctatttggactggtacctgcagaagccagggcagtctccacagctcctgatctatacgctttcctatcgggcctctggagtcccagacaggttcagtggcagtgggtcaggcactgatttcacactgaaaatcagcagggtggaggctgaggatgttggagtttattactgcatgcaacgtatagagtttccttc
+>IGKV2/OR2-7D*01
+gacattctgttgacccagactccactctccctgtccatcacccccggagagccggcctccatctcctgcaggtctagtcgcagcctcctgcatagtaatggaaacacctatttacattggtagctgcagaagccaggccagcctccacagtgtctaatctgcaaggtttctaaccggttttctggggtcccagacaggttcagtggcagtgggtcgggcattgatttcacactgaaaatcagcccggtggaggctgcggatgttggggtttatattactgcatgcaagctacacactggtcccc
+>IGKV2/OR22-4*01
+gacattgtgatgacccagactccactctccctgcctgtcactcctggagagccagcctccatctcctgcagatctagtgagagcctcttggatactgatgatgaatacacctatttgaattggtacctgcagaagccaggccagtctccacagctcctgatctatgaggtttccaaccgggcctctggagttccagacaggttcagtggcagtgggtcaggcactgatttcactctgaaaatcagtagggtggaggcttaggatgttggggtttattactgcatgcaagctctacaaactccgcc
+>IGKV2D-18*01
+gatattgtgatgacccagactccaccctccctgcccgtcaaccctggagagccggcctccatctcctgcaggtctagtcaaagcctcctgcatagtaatggatatacctatttgcattggtacccgcagaagccagggcaatctccacagctcctgatttatagggtttccagtcgtttttctggggtcccagacaggtttagtggcagtgggtcaggcagtgatttcacactgaaaatcagctgggtggaggctgaggatgttggggtttattactgcatgcaagctacacagtttcct
+>IGKV2D-24*01
+gatattgtgatgacccagactccactctcctcgcctgtcacccttggacagccggcctccatctccttcaggtctagtcaaagcctcgtacacagtgatggaaacacctacttgagttggcttcagcagaggccaggccagcctccaagactcctaatttataaggtttctaaccggttctctggggtcccagacagattcagtggcagtggggcagggacagatttcacactgaaaatcagcagggtggaagctgaggatgtcggggtttattactgcacgcaagctacacaatttcctca
+>IGKV2D-26*01
+gagattgtgatgacccagactccactctccttgtctatcacccctggagagcaggcctccatgtcctgcaggtctagtcagagcctcctgcatagtgatggatacacctatttgtattggtttctgcagaaagccaggccagtctccacgctcctgatctatgaagtttccaaccggttctctggagtgccagataggttcagtggcagcgggtcagggacagatttcacactgaaaatcagccgggtggaggctgaggattttggagtttattactgcatgcaagatgcacaagatcctcc
+>IGKV2D-28*01
+gatattgtgatgactcagtctccactctccctgcccgtcacccctggagagccggcctccatctcctgcaggtctagtcagagcctcctgcatagtaatggatacaactatttggattggtacctgcagaagccagggcagtctccacagctcctgatctatttgggttctaatcgggcctccggggtccctgacaggttcagtggcagtggatcaggcacagattttacactgaaaatcagcagagtggaggctgaggatgttggggtttattactgcatgcaagctctacaaactcctcc
+>IGKV2D-29*01
+gatattgtgatgacccagactccactctctctgtccgtcacccctggacagccggcctccatctcctgcaagtctagtcagagcctcctgcatagtgatggaaagacctatttgtattggtacctgcagaagccaggccagcctccacagctcctgatctatgaagtttccaaccggttctctggagtgccagataggttcagtggcagcgggtcagggacagatttcacactgaaaatcagccgggtggaggctgaggatgttggggtttattactgcatgcaaagtatacagcttcctcc
+>IGKV2D-30*01
+gatgttgtgatgactcagtctccactctccctgcccgtcacccttggacagccggcctccatctcctgcaggtctagtcaaagcctcgtatacagtgatggaaacacctacttgaattggtttcagcagaggccaggccaatctccaaggcgcctaatttataaggtttctaactgggactctggggtcccagacagattcagcggcagtgggtcaggcactgatttcacactgaaaatcagcagggtggaggctgaggatgttggggtttattactgcatgcaaggtacacactggcctcc
+>IGKV2D-40*01
+gatattgtgatgacccagactccactctccctgcccgtcacccctggagagccggcctccatctcctgcaggtctagtcagagcctcttggatagtgatgatggaaacacctatttggactggtacctgcagaagccagggcagtctccacagctcctgatctatacgctttcctatcgggcctctggagtcccagacaggttcagtggcagtgggtcaggcactgatttcacactgaaaatcagcagggtggaggctgaggatgttggagtttattactgcatgcaacgtatagagtttccttc
+>IGKV3-11*01
+gaaattgtgttgacacagtctccagccaccctgtctttgtctccaggggaaagagccaccctctcctgcagggccagtcagagtgttagcagctacttagcctggtaccaacagaaacctggccaggctcccaggctcctcatctatgatgcatccaacagggccactggcatcccagccaggttcagtggcagtgggtctgggacagacttcactctcaccatcagcagcctagagcctgaagattttgcagtttattactgtcagcagcgtagcaactggcctcc
+>IGKV3-15*01
+gaaatagtgatgacgcagtctccagccaccctgtctgtgtctccaggggaaagagccaccctctcctgcagggccagtcagagtgttagcagcaacttagcctggtaccagcagaaacctggccaggctcccaggctcctcatctatggtgcatccaccagggccactggtatcccagccaggttcagtggcagtgggtctgggacagagttcactctcaccatcagcagcctgcagtctgaagattttgcagtttattactgtcagcagtataataactggcctcc
+>IGKV3-20*01
+gaaattgtgttgacgcagtctccaggcaccctgtctttgtctccaggggaaagagccaccctctcctgcagggccagtcagagtgttagcagcagctacttagcctggtaccagcagaaacctggccaggctcccaggctcctcatctatggtgcatccagcagggccactggcatcccagacaggttcagtggcagtgggtctgggacagacttcactctcaccatcagcagactggagcctgaagattttgcagtgtattactgtcagcagtatggtagctcacctcc
+>IGKV3-7*01
+gaaattgtaatgacacagtctccacccaccctgtctttgtctccaggggaaagagtcaccctctcctgcagggccagtcagagtgttagcagcagctacttaacctggtatcagcagaaacctggccaggcgcccaggctcctcatctatggtgcatccaccagggccactagcatcccagccaggttcagtggcagtgggtctgggacagacttcactctcaccatcagcagcctgcagcctgaagattttgcagtttattactgtcagcaggattataacttacctcc
+>IGKV3/OR2-268*01
+gaaattgtaatgacacagtctccagccaccctgtctttgtctccaggggaaagagccaccctctcctgcagggccagtcagagtgttagcagcagctacttatcctggtaccagcagaaacctgggcaggctcccaggctcctcatctatggtgcatccaccagggccactggcatcccagccaggttcagtggtagtgggtctgggacagacttcactctcaccatcagcagcctgcagcctgaagattttgcagtttattactgtcagcaggattataacttacctcc
+>IGKV3D-11*01
+gaaattgtgttgacacagtctccagccaccctgtctttgtctccaggggaaagagccaccctctcctgcagggccagtcagggtgttagcagctacttagcctggtaccagcagaaacctggccaggctcccaggctcctcatctatgatgcatccaacagggccactggcatcccagccaggttcagtggcagtgggcctgggacagacttcactctcaccatcagcagcctagagcctgaagattttgcagtttattactgtcagcagcgtagcaactggcatcc
+>IGKV3D-15*01
+gaaatagtgatgacgcagtctccagccaccctgtctgtgtctccaggggaaagagccaccctctcctgcagggccagtcagagtgttagcagcaacttagcctggtaccagcagaaacctggccaggctcccaggctcctcatctatggtgcatccaccagggccactggcatcccagccaggttcagtggcagtgggtctgggacagagttcactctcaccatcagcagcctgcagtctgaagattttgcagtttattactgtcagcagtataataactggcctcc
+>IGKV3D-20*01
+gaaattgtgttgacgcagtctccagccaccctgtctttgtctccaggggaaagagccaccctctcctgcggggccagtcagagtgttagcagcagctacttagcctggtaccagcagaaacctggcctggcgcccaggctcctcatctatgatgcatccagcagggccactggcatcccagacaggttcagtggcagtgggtctgggacagacttcactctcaccatcagcagactggagcctgaagattttgcagtgtattactgtcagcagtatggtagctcacctcc
+>IGKV3D-7*01
+gaaattgtaatgacacagtctccagccaccctgtctttgtctccaggggaaagagccaccctctcctgcagggccagtcagagtgttagcagcagctacttatcctggtaccagcagaaacctgggcaggctcccaggctcctcatctatggtgcatccaccagggccactggcatcccagccaggttcagtggcagtgggtctgggacagacttcactctcaccatcagcagcctgcagcctgaagattttgcagtttattactgtcagcaggattataacttacctcc
+>IGKV4-1*01
+gacatcgtgatgacccagtctccagactccctggctgtgtctctgggcgagagggccaccatcaactgcaagtccagccagagtgttttatacagctccaacaataagaactacttagcttggtaccagcagaaaccaggacagcctcctaagctgctcatttactgggcatctacccgggaatccggggtccctgaccgattcagtggcagcgggtctgggacagatttcactctcaccatcagcagcctgcaggctgaagatgtggcagtttattactgtcagcaatattatagtactcctcc
+>IGKV5-2*01
+gaaacgacactcacgcagtctccagcattcatgtcagcgactccaggagacaaagtcaacatctcctgcaaagccagccaagacattgatgatgatatgaactggtaccaacagaaaccaggagaagctgctattttcattattcaagaagctactactctcgttcctggaatcccacctcgattcagtggcagcgggtatggaacagattttaccctcacaattaataacatagaatctgaggatgctgcatattacttctgtctacaacatgataatttccctct
+>IGKV6-21*01
+gaaattgtgctgactcagtctccagactttcagtctgtgactccaaaggagaaagtcaccatcacctgccgggccagtcagagcattggtagtagcttacactggtaccagcagaaaccagatcagtctccaaagctcctcatcaagtatgcttcccagtccttctcaggggtcccctcgaggttcagtggcagtggatctgggacagatttcaccctcaccatcaatagcctggaagctgaagatgctgcaacgtattactgtcatcagagtagtagtttacctca
+>IGKV6D-21*01
+gaaattgtgctgactcagtctccagactttcagtctgtgactccaaaggagaaagtcaccatcacctgccgggccagtcagagcattggtagtagcttacactggtaccagcagaaaccagatcagtctccaaagctcctcatcaagtatgcttcccagtccttctcaggggtcccctcgaggttcagtggcagtggatctgggacagatttcaccctcaccatcaatagcctggaagctgaagatgctgcaacgtattactgtcatcagagtagtagtttacctca
+>IGKV6D-41*01
+gatgttgtgatgacacagtctccagctttcctctctgtgactccaggggagaaagtcaccatcacctgccaggccagtgaaggcattggcaactacttatactggtaccagcagaaaccagatcaagccccaaagctcctcatcaagtatgcttcccagtccatctcaggggtcccctcgaggttcagtggcagtggatctgggacagatttcacctttaccatcagtagcctggaagctgaagatgctgcaacatattactgtcagcagggcaataagcaccctca
+>IGKV7-3*01
+gacattgtgctgacccagtctccagcctccttggccgtgtctccaggacagagggccaccatcacctgcagagccagtgagagtgtcagtttcttgggaataaacttaattcactggtatcagcagaaaccaggacaacctcctaaactcctgatttaccaagcatccaataaagacactggggtcccagccaggttcagcggcagtgggtctgggaccgatttcaccctcacaattaatcctgtggaagctaatgatactgcaaattattactgtctgcagagtaagaattttcctcc
diff --git a/data/germline/human/IG_antevolo/IGLJ.fa b/data/germline/human/IG_antevolo/IGLJ.fa
new file mode 100644
index 00000000..d74aed59
--- /dev/null
+++ b/data/germline/human/IG_antevolo/IGLJ.fa
@@ -0,0 +1,14 @@
+>IGLJ1*01
+ttatgtcttcggaactgggaccaaggtcaccgtcctag
+>IGLJ2*01
+tgtggtattcggcggagggaccaagctgaccgtcctag
+>IGLJ3*01
+tgtggtattcggcggagggaccaagctgaccgtcctag
+>IGLJ4*01
+ttttgtatttggtggaggaacccagctgatcattttag
+>IGLJ5*01
+ctgggtgtttggtgaggggaccgagctgaccgtcctag
+>IGLJ6*01
+taatgtgttcggcagtggcaccaaggtgaccgtcctcg
+>IGLJ7*01
+tgctgtgttcggaggaggcacccagctgaccgtcctcg
diff --git a/data/germline/human/IG_antevolo/IGLV.fa b/data/germline/human/IG_antevolo/IGLV.fa
new file mode 100644
index 00000000..210cb2e3
--- /dev/null
+++ b/data/germline/human/IG_antevolo/IGLV.fa
@@ -0,0 +1,92 @@
+>IGLV1-36*01
+cagtctgtgctgactcagccaccctcggtgtctgaagcccccaggcagagggtcaccatctcctgttctggaagcagctccaacatcggaaataatgctgtaaactggtaccagcagctcccaggaaaggctcccaaactcctcatctattatgatgatctgctgccctcaggggtctctgaccgattctctggctccaagtctggcacctcagcctccctggccatcagtgggctccagtctgaggatgaggctgattattactgtgcagcatgggatgacagcctgaatggtcc
+>IGLV1-40*01
+cagtctgtcgtgacgcagccgccctcagtgtctggggccccagggcagagggtcaccatctcctgcactgggagcagctccaacatcggggcaggttatgatgtacactggtaccagcagcttccaggaacagcccccaaactcctcatctatggtaacagcaatcggccctcaggggtccctgaccgattctctggctccaagtctggcacctcagcctccctggccatcactgggctccaggctgaggatgaggctgattattactgccagtcctatgacagcagcctgagtggttc
+>IGLV1-41*01
+cagtctgtgttgacgcagccgccttcagtgtctgcggccccaggacagaaggtcaccatctcctgctctggaagcagctccgacatggggaattatgcggtatcctggtaccagcagctcccaggaacagcccccaaactcctcatctatgaaaataataagcgaccctcagggattcctgaccgattctctggctccaagtctggcacctcagccaccctgggcatcactggcctctggcctgaggacgaggccgattattactgcttagcatgggataccagcccgagagcttg
+>IGLV1-44*01
+cagtctgtgctgactcagccaccctcagcgtctgggacccccgggcagagggtcaccatctcttgttctggaagcagctccaacatcggaagtaatactgtaaactggtaccagcagctcccaggaacggcccccaaactcctcatctatagtaataatcagcggccctcaggggtccctgaccgattctctggctccaagtctggcacctcagcctccctggccatcagtgggctccagtctgaggatgaggctgattattactgtgcagcatgggatgacagcctgaatggtcc
+>IGLV1-47*01
+cagtctgtgctgactcagccaccctcagcgtctgggacccccgggcagagggtcaccatctcttgttctggaagcagctccaacatcggaagtaattatgtatactggtaccagcagctcccaggaacggcccccaaactcctcatctataggaataatcagcggccctcaggggtccctgaccgattctctggctccaagtctggcacctcagcctccctggccatcagtgggctccggtccgaggatgaggctgattattactgtgcagcatgggatgacagcctgagtggtcc
+>IGLV1-50*01
+cagtctgtgctgacgcagccgccctcagtgtctggggccccagggcagagggtcaccatctcctgcactgggagcagctccaacattggggcgggttatgttgtacattggtaccagcagcttccaggaacagcccccaaactcctcatctatggtaacagcaatcggccctcaggggtccctgaccaattctctggctccaagtctggcacctcagcctccctggccatcactggactccagtctgaggatgaggctgattattactgcaaagcatgggataacagcctgaatgctca
+>IGLV1-51*01
+cagtctgtgttgacgcagccgccctcagtgtctgcggccccaggacagaaggtcaccatctcctgctctggaagcagctccaacattgggaataattatgtatcctggtaccagcagctcccaggaacagcccccaaactcctcatttatgacaataataagcgaccctcagggattcctgaccgattctctggctccaagtctggcacgtcagccaccctgggcatcaccggactccagactggggacgaggccgattattactgcggaacatgggatagcagcctgagtgctgg
+>IGLV1-62*01
+cagtctgtgctgactcagccaccctcagtgtcttgggccacaaggcagaggctcactgtctcctgcactggaagcagctccaacactgggactggctataacgtaaactgttggcagtagctcccaagaactgaccccaaactcctcaggcatggtgataagaattgggcctcctgggtatctgaccaattctctggttccaagtctggcagcttggcctccctgggcaccactgggctctgggctgaggacaagactgattatcactgccagtcccgtgacatctgctgagtgcttg
+>IGLV10-54*01
+caggcagggctgactcagccaccctcggtgtccaagggcttgagacagaccgccacactcacctgcactgggaacagcaacaatgttggcaaccaaggagcagcttggctgcagcagcaccagggccaccctcccaaactcctatcctacaggaataacaaccggccctcagggatctcagagagattatctgcatccaggtcaggaaacacagcctccctgaccattactggactccagcctgaggacgaggctgactattactgctcagcatgggacagcagcctcagtgctca
+>IGLV11-55*01
+cggcccgtgctgactcagccgccctctctgtctgcatccccgggagcaacagccagactcccctgcaccctgagcagtgacctcagtgttggtggtaaaaacatgttctggtaccagcagaagccagggagctctcccaggttattcctgtatcactactcagactcagacaagcagctgggacctggggtccccagtcgagtctctggctccaaggagacctcaagtaacacagcgtttttgctcatctctgggctccagcctgaggacgaggccgattattactgccaggtgtacgaaagtagtgctaat
+>IGLV2-11*01
+cagtctgccctgactcagcctcgctcagtgtccgggtctcctggacagtcagtcaccatctcctgcactggaaccagcagtgatgttggtggttataactatgtctcctggtaccaacagcacccaggcaaagcccccaaactcatgatttatgatgtcagtaagcggccctcaggggtccctgatcgcttctctggctccaagtctggcaacacggcctccctgaccatctctgggctccaggctgaggatgaggctgattattactgctgctcatatgcaggcagctacactttc
+>IGLV2-14*01
+cagtctgccctgactcagcctgcctccgtgtctgggtctcctggacagtcgatcaccatctcctgcactggaaccagcagtgacgttggtggttataactatgtctcctggtaccaacagcacccaggcaaagcccccaaactcatgatttatgaggtcagtaatcggccctcaggggtttctaatcgcttctctggctccaagtctggcaacacggcctccctgaccatctctgggctccaggctgaggacgaggctgattattactgcagctcatatacaagcagcagcactctc
+>IGLV2-18*01
+cagtctgccctgactcagcctccctccgtgtccgggtctcctggacagtcagtcaccatctcctgcactggaaccagcagtgacgttggtagttataaccgtgtctcctggtaccagcagcccccaggcacagcccccaaactcatgatttatgaggtcagtaatcggccctcaggggtccctgatcgcttctctgggtccaagtctggcaacacggcctccctgaccatctctgggctccaggctgaggacgaggctgattattactgcagctcatatacaagcagcagcactttc
+>IGLV2-23*01
+cagtctgccctgactcagcctgcctccgtgtctgggtctcctggacagtcgatcaccatctcctgcactggaaccagcagtgatgttgggagttataaccttgtctcctggtaccaacagcacccaggcaaagcccccaaactcatgatttatgagggcagtaagcggccctcaggggtttctaatcgcttctctggctccaagtctggcaacacggcctccctgacaatctctgggctccaggctgaggacgaggctgattattactgctgctcatatgcaggtagtagcactttac
+>IGLV2-33*01
+caatctgccctgactcagcctccttttgtgtccggggctcctggacagtcggtcaccatctcctgcactggaaccagcagtgacgttggggattatgatcatgtcttctggtaccaaaagcgtctcagcactacctccagactcctgatttacaatgtcaatactcggccttcagggatctctgacctcttctcaggctccaagtctggcaacatggcttccctgaccatctctgggctcaagtccgaggttgaggctaattatcactgcagcttatattcaagtagttacactttc
+>IGLV2-34*01
+cagtctgttctgactcagcctcgctcagtgtccaggtctcctggacagtaggttactatcttctgcactggaaccagcagtgacattgggggttatgaccttgtctcctggtgccagtagcacccaggcaaagcccccaaactcatgatttatgatgtcgctaattggccctcaggggcccctggttgcttctctggctccaagtctggcaacacggcctccctgaccatctctgggctccaggctgaggacgaggctgattattactgcagctcatatgcaggcagctacaatttc
+>IGLV2-5*01
+cagtctgccctgattcagcctccctccgtgtccgggtctcctggacagtcagtcaccatctcctgcactggaaccagcagtgatgttgggagttatgactatgtctcctggtaccaacagcacccaggcacagtccccaaacccatgatctacaatgtcaatactcagccctcaggggtccctgatcgtttctctggctccaagtctggcaatacggcctccatgaccatctctggactccaggctgaggacgaggctgattattagtgctgctcatatacaagcagtgccacttaac
+>IGLV2-8*01
+cagtctgccctgactcagcctccctccgcgtccgggtctcctggacagtcagtcaccatctcctgcactggaaccagcagtgacgttggtggttataactatgtctcctggtaccaacagcacccaggcaaagcccccaaactcatgatttatgaggtcagtaagcggccctcaggggtccctgatcgcttctctggctccaagtctggcaacacggcctccctgaccgtctctgggctccaggctgaggatgaggctgattattactgcagctcatatgcaggcagcaacaatttc
+>IGLV2-NL1*01
+cagtctgttctgactcagcctcgctcagtgtccaggtctcctggacagtaggttactatcttctgcactggaaccagcagtgacattgggggttatgaccttgtctcctggtgccagtagcacccaggcaaagcccccaaactcatgatttatgatgtcggtaattggccctcaggggcccctggttgcttctctggctccaagtctggcaacacggcctccctgaccatctctgggctccaggctgaggacgaggctgattattactgcagctcatatgcaggcagctacaatttc
+>IGLV3-1*01
+tcctatgagctgactcagccaccctcagtgtccgtgtccccaggacagacagccagcatcacctgctctggagataaattgggggataaatatgcttgctggtatcagcagaagccaggccagtcccctgtgctggtcatctatcaagatagcaagcggccctcagggatccctgagcgattctctggctccaactctgggaacacagccactctgaccatcagcgggacccaggctatggatgaggctgactattactgtcaggcgtgggacagcagcactgca
+>IGLV3-10*01
+tcctatgagctgacacagccaccctcggtgtcagtgtccccaggacaaacggccaggatcacctgctctggagatgcattgccaaaaaaatatgcttattggtaccagcagaagtcaggccaggcccctgtgctggtcatctatgaggacagcaaacgaccctccgggatccctgagagattctctggctccagctcagggacaatggccaccttgactatcagtggggcccaggtggaggatgaagctgactactactgttactcaacagacagcagtggtaatcatag
+>IGLV3-12*01
+tcctatgagctgactcagccacactcagtgtcagtggccacagcacagatggccaggatcacctgtgggggaaacaacattggaagtaaagctgtgcactggtaccagcaaaagccaggccaggaccctgtgctggtcatctatagcgatagcaaccggccctcagggatccctgagcgattctctggctccaacccagggaacaccaccaccctaaccatcagcaggatcgaggctggggatgaggctgactattactgtcaggtgtgggacagtagtagtgatcatcc
+>IGLV3-13*01
+tcctatgagctgacacagccacccgcggtgtcagtgtccccaggacagacagccaggatcagctgctctggagatgtactgagggataattatgctgactggtacccgcaaaagccaggccaggcccctgtgctggtgatatataaagatggtgagcggccctctggaatccctgagcgattctctgggtccacctcagggaacacaaccgccctgaccattagcagggtcctgaccaaaggcggggctgactattactgtttttctggtgattagaacaatct
+>IGLV3-16*01
+tcctatgagctgacacagccaccctcggtgtcagtgtccctaggacagatggccaggatcacctgctctggagaagcattgccaaaaaaatatgcttattggtaccagcagaagccaggccagttccctgtgctggtgatatataaagacagcgagaggccctcagggatccctgagcgattctctggctccagctcagggacaatagtcacattgaccatcagtggagtccaggcagaagacgaggctgactattactgtctatcagcagacagcagtggtacttatcc
+>IGLV3-19*01
+tcttctgagctgactcaggaccctgctgtgtctgtggccttgggacagacagtcaggatcacatgccaaggagacagcctcagaagctattatgcaagctggtaccagcagaagccaggacaggcccctgtacttgtcatctatggtaaaaacaaccggccctcagggatcccagaccgattctctggctccagctcaggaaacacagcttccttgaccatcactggggctcaggcggaagatgaggctgactattactgtaactcccgggacagcagtggtaaccatct
+>IGLV3-21*01
+tcctatgtgctgactcagccaccctcggtgtcagtggccccaggaaagacggccaggattacctgtgggggaaacaacattggaagtaaaagtgtgcactggtaccagcagaagccaggccaggcccctgtgctggtcgtctatgatgatagcgaccggccctcagggatccctgagcgattctctggctccaactctgggaacacggccaccctgaccatcagcagggtcgaagccggggatgaggccgactattactgtcaggtgtgggatagtagtagtgatcatcc
+>IGLV3-22*01
+tcctatgagctgacacagctaccctcggtgtcagtgtccccaggacagacagccaggatcacctgctctggagatgtactgggggaaaattatgctgactggtaccagcagaagccaggccaggcccctgagttggtgatatacgaagatagtgagcggtaccctggaatccctgaacgattctctgggtccacctcagggaacacgaccaccctgaccatcagcagggtcctgaccgaagacgaggctgactattactgtttgtctggggatgaggacaatcc
+>IGLV3-25*01
+tcctatgagctgacacagccaccctcggtgtcagtgtccccaggacagacggccaggatcacctgctctggagatgcattgccaaagcaatatgcttattggtaccagcagaagccaggccaggcccctgtgctggtgatatataaagacagtgagaggccctcagggatccctgagcgattctctggctccagctcagggacaacagtcacgttgaccatcagtggagtccaggcagaagatgaggctgactattactgtcaatcagcagacagcagtggtacttatcc
+>IGLV3-27*01
+tcctatgagctgacacagccatcctcagtgtcagtgtctccgggacagacagccaggatcacctgctcaggagatgtactggcaaaaaaatatgctcggtggttccagcagaagccaggccaggcccctgtgctggtgatttataaagacagtgagcggccctcagggatccctgagcgattctccggctccagctcagggaccacagtcaccttgaccatcagcggggcccaggttgaggatgaggctgactattactgttactctgcggctgacaacaatct
+>IGLV3-31*01
+tcctctgagctgagtcaggagcctgcagtgtctgtggccttgggatagacagccaggatcacctgccagggagacagcatagaagactccgttgtaaactggtacaagcagaagccaagccaggcccctgggctggtcatctaacttaacagtgtccagtcttcagggattcctaagaaattctctggctccagctcagggaacatggccaccctgaccatcactgggattcaggttgaagacaaggctgactattactgtcagtcatgggacagcagtcgtactcattc
+>IGLV3-32*01
+tcctctgggccaactcaggtgcctgcagtgtctgtggccttgggacaaatggccaggatcacctgccagggagacagcatggaaggctcttatgaacactggtaccagcagaagccaggccaggcccccgtgctggtcatctatgatagcagtgaccggccctcaaggatccctgagcgattctctggctccaaatcaggcaacacaaccaccctgaccatcactggggcccaggctgaggatgaggctgattattactatcagttgatagacaaccatgctac
+>IGLV3-9*01
+tcctatgagctgactcagccactctcagtgtcagtggccctgggacagacggccaggattacctgtgggggaaacaacattggaagtaaaaatgtgcactggtaccagcagaagccaggccaggcccctgtgctggtcatctatagggatagcaaccggccctctgggatccctgagcgattctctggctccaactcggggaacacggccaccctgaccatcagcagagcccaagccggggatgaggctgactattactgtcaggtgtgggacagcagcactgcacaccc
+>IGLV4-3*01
+ctgcctgtgctgactcagcccccgtctgcatctgccttgctgggagcctcgatcaagctcacctgcaccctaagcagtgagcacagcacctacaccatcgaatggtatcaacagagaccagggaggtccccccagtatataatgaaggttaagagtgatggcagccacagcaagggggacgggatccccgatcgcttcatgggctccagttctggggctgaccgctacctcaccttctccaacctccagtctgacgatgaggctgagtatcactgtggagagagccacacgattgatggccaagtcggttgagc
+>IGLV4-60*01
+cagcctgtgctgactcaatcatcctctgcctctgcttccctgggatcctcggtcaagctcacctgcactctgagcagtgggcacagtagctacatcatcgcatggcatcagcagcagccagggaaggcccctcggtacttgatgaagcttgaaggtagtggaagctacaacaaggggagcggagttcctgatcgcttctcaggctccagctctggggctgaccgctacctcaccatctccaacctccagtttgaggatgaggctgattattactgtgagacctgggacagtaacactca
+>IGLV4-69*01
+cagcttgtgctgactcaatcgccctctgcctctgcctccctgggagcctcggtcaagctcacctgcactctgagcagtgggcacagcagctacgccatcgcatggcatcagcagcagccagagaagggccctcggtacttgatgaagcttaacagtgatggcagccacagcaagggggacgggatccctgatcgcttctcaggctccagctctggggctgagcgctacctcaccatctccagcctccagtctgaggatgaggctgactattactgtcagacctggggcactggcattca
+>IGLV5-37*01
+cagcctgtgctgactcagccaccttcctcctccgcatctcctggagaatccgccagactcacctgcaccttgcccagtgacatcaatgttggtagctacaacatatactggtaccagcagaagccagggagccctcccaggtatctcctgtactactactcagactcagataagggccagggctctggagtccccagccgcttctctggatccaaagatgcttcagccaatacagggattttactcatctccgggctccagtctgaggatgaggctgactattactgtatgatttggccaagcaatgcttct
+>IGLV5-39*01
+cagcctgtgctgactcagccaacctccctctcagcatctcctggagcatcagccagattcacctgcaccttgcgcagtggcatcaatgttggtacctacaggatatactggtaccagcagaagccagggagtcttccccggtatctcctgaggtacaaatcagactcagataagcagcagggctctggagtccccagccgcttctctggatccaaagatgcttcaaccaatgcaggccttttactcatctctgggctccagtctgaagatgaggctgactattactgtgccatttggtacagcagcacttct
+>IGLV5-45*01
+caggctgtgctgactcagccgtcttccctctctgcatctcctggagcatcagccagtctcacctgcaccttgcgcagtggcatcaatgttggtacctacaggatatactggtaccagcagaagccagggagtcctccccagtatctcctgaggtacaaatcagactcagataagcagcagggctctggagtccccagccgcttctctggatccaaagatgcttcggccaatgcagggattttactcatctctgggctccagtctgaggatgaggctgactattactgtatgatttggcacagcagcgcttct
+>IGLV5-48*01
+cagcctgtgctgactcagccaacttccctctcagcatctcctggagcatcagccagactcacctgcaccttgcgcagtggcatcaatcttggtagctacaggatattctggtaccagcagaagccagagagccctccccggtatctcctgagctactactcagactcaagtaagcatcagggctctggagtccccagccgcttctctggatccaaagatgcttcgagcaatgcagggattttagtcatctctgggctccagtctgaggatgaggctgactattactgtatgatttggcacagcagtgcttct
+>IGLV5-52*01
+cagcctgtgctgactcagccatcttcccattctgcatcttctggagcatcagtcagactcacctgcatgctgagcagtggcttcagtgttggggacttctggataaggtggtaccaacaaaagccagggaaccctccccggtatctcctgtactaccactcagactccaataagggccaaggctctggagttcccagccgcttctctggatccaacgatgcatcagccaatgcagggattctgcgtatctctgggctccagcctgaggatgaggctgactattactgtggtacatggcacagcaactctaagactca
+>IGLV6-57*01
+aattttatgctgactcagccccactctgtgtcggagtctccggggaagacggtaaccatctcctgcacccgcagcagtggcagcattgccagcaactatgtgcagtggtaccagcagcgcccgggcagttcccccaccactgtgatctatgaggataaccaaagaccctctggggtccctgatcggttctctggctccatcgacagctcctccaactctgcctccctcaccatctctggactgaagactgaggacgaggctgactactactgtcagtcttatgatagcagcaatca
+>IGLV7-43*01
+cagactgtggtgactcaggagccctcactgactgtgtccccaggagggacagtcactctcacctgtgcttccagcactggagcagtcaccagtggttactatccaaactggttccagcagaaacctggacaagcacccagggcactgatttatagtacaagcaacaaacactcctggacccctgcccggttctcaggctccctccttgggggcaaagctgccctgacactgtcaggtgtgcagcctgaggacgaggctgagtattactgcctgctctactatggtggtgctcag
+>IGLV7-46*01
+caggctgtggtgactcaggagccctcactgactgtgtccccaggagggacagtcactctcacctgtggctccagcactggagctgtcaccagtggtcattatccctactggttccagcagaagcctggccaagcccccaggacactgatttatgatacaagcaacaaacactcctggacacctgcccggttctcaggctccctccttgggggcaaagctgccctgaccctttcgggtgcgcagcctgaggatgaggctgagtattactgcttgctctcctatagtggtgctcgg
+>IGLV8-61*01
+cagactgtggtgacccaggagccatcgttctcagtgtcccctggagggacagtcacactcacttgtggcttgagctctggctcagtctctactagttactaccccagctggtaccagcagaccccaggccaggctccacgcacgctcatctacagcacaaacactcgctcttctggggtccctgatcgcttctctggctccatccttgggaacaaagctgccctcaccatcacgggggcccaggcagatgatgaatctgattattactgtgtgctgtatatgggtagtggcatttc
+>IGLV8/OR8-1*01
+cagtctgtggtgacccaggagccatcactctcagggtctcctggagggacggtcacactcacctgtgccctgagctctggctcagtctctaccagtcactaccccaggtggtaccagcagaccccaggccaggctccacacatgctcatctgcagcccaaacacctgcccttctggggtccctggtcgcttctctggctccatccttgggaacaaagctgccctcaccatcacggggactcaggtagatgatgactctgatcattactgtgtgctgtacatgggtagtggcaat
+>IGLV9-49*01
+cagcctgtgctgactcagccaccttctgcatcagcctccctgggagcctcggtcacactcacctgcaccctgagcagcggctacagtaattataaagtggactggtaccagcagagaccagggaagggcccccggtttgtgatgcgagtgggcactggtgggattgtgggatccaagggggatggcatccctgatcgcttctcagtcttgggctcaggcctgaatcggtacctgaccatcaagaacatccaggaagaggatgagagtgactaccactgtggggcagaccatggcagtgggagcaacttcgtgtaacc
diff --git a/ig_simulator.py b/ig_simulator.py
new file mode 100755
index 00000000..4371b2da
--- /dev/null
+++ b/ig_simulator.py
@@ -0,0 +1,248 @@
+#!/usr/bin/env python2
+
+import os
+import sys
+import init
+import logging
+import shutil
+import ntpath
+
+import process_cfg
+import support
+import argparse
+from argparse import ArgumentParser
+
+home_directory = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
+cdr_labeler_config_dir = os.path.join(home_directory, "configs", "cdr_labeler")
+vj_finder_config_dir = os.path.join(home_directory, "configs", "vj_finder")
+ig_simulator_config_dir = os.path.join(home_directory, "configs", "ig_simulator")
+ig_simulator_bin = os.path.join(home_directory, "build", "release", "bin", "ig_simulator")
+data_annotation_dir = os.path.join(home_directory, "data/annotation")
+
+test_dir = os.path.join(home_directory, "ig_simulator_test")
+
+tool_name = "IgSimulator"
+
+
+def CheckBinariesExistance(params, log):
+ if not os.path.exists(ig_simulator_bin):
+ log.info("ERROR: Binary files were not found. Please compile " + tool_name + " before running.")
+ sys.exit(1)
+
+
+def TreeStrategyCorrect(tree_strategy):
+ return tree_strategy in ["uniform", "wide", "deep"]
+
+
+def LociParamCorrect(loci):
+ return loci in ["IGH", "IGK", "IGL"]
+
+
+def CheckParamsCorrectness(params, log):
+ if not LociParamCorrect(params.loci):
+ log.info("Loci " + params.loci + " is not recognized")
+ sys.exit(1)
+ if not TreeStrategyCorrect(params.tree_strategy):
+ log.info("Tree Strategy " + params.tree_strategy + " is not recognized")
+ sys.exit(1)
+
+
+def SetOutputParams(params, log):
+ params.output_dir = os.path.abspath(params.output_dir)
+ params.output_config_dir = os.path.join(params.output_dir, "configs")
+
+
+def PrepareOutputDir(params):
+ if os.path.exists(params.output_dir):
+ shutil.rmtree(params.output_dir)
+ os.makedirs(params.output_dir)
+
+
+def check_positive(value):
+ ivalue = int(value)
+ if ivalue <= 0:
+ raise argparse.ArgumentTypeError("%s is an invalid positive int value" % value)
+ return ivalue
+
+
+def parse_args():
+ parser = ArgumentParser(description="== " + tool_name + ": a tool for simulating antibody repertoires, clonal lineages and trees ==",
+ epilog="In case you have troubles running " + tool_name + ", you can write to igtools_support@googlegroups.com."
+ "Please provide us with igsimulator.log file from the output directory.",
+ add_help=False)
+ req_args = parser.add_argument_group("Required params")
+ output_args = req_args.add_mutually_exclusive_group(required=True)
+ output_args.add_argument("-o", "--output",
+ type=str,
+ default="",
+ dest="output_dir",
+ help="Output directory")
+
+ output_args.add_argument("--test",
+ action="store_const",
+ const=test_dir,
+ dest="output_dir",
+ help="Running in test mode")
+
+ optional_args = parser.add_argument_group("Optional arguments")
+
+ optional_args.add_argument("-l", "--loci",
+ type=str,
+ default="IGH",
+ dest="loci",
+ help="Loci: IGH, IGK, IGL" # ", TRA, TRB, TRG, TRD, TR (all TCRs) or all. "
+ "[default: %(default)s]")
+
+ optional_args.add_argument("-s", "--tree_strategy",
+ type=str,
+ default="deep",
+ dest="tree_strategy",
+ help="Tree strategy to use: uniform, wide, deep [default: %(default)s]")
+
+ optional_args.add_argument("-n", "--n_metaroots",
+ type=check_positive,
+ default=10,
+ dest="number_of_metaroots")
+
+ optional_args.add_argument("-h", "--help",
+ action="help",
+ help="Help message and exit")
+ return parser.parse_args()
+
+
+def get_logger():
+ log = logging.getLogger(tool_name)
+ log.setLevel(logging.DEBUG)
+ console = logging.StreamHandler(sys.stdout)
+ console.setFormatter(logging.Formatter('%(message)s'))
+ console.setLevel(logging.DEBUG)
+ log.addHandler(console)
+ return log
+
+
+def add_log_handler(params, log):
+ # log file
+ params.log_filename = os.path.join(params.output_dir, "ig_simulator.log")
+ if os.path.exists(params.log_filename):
+ os.remove(params.log_filename)
+ log_handler = logging.FileHandler(params.log_filename, mode='a')
+ log.addHandler(log_handler)
+ return log
+
+
+def PrintParams(params, log):
+ log.info(tool_name + " parameters:")
+ log.info(" Output directory:\t" + params.output_dir + "\n")
+ log.info(" Loci:\t\t\t" + params.loci)
+ log.info(" # of metaroots:\t\t" + str(params.number_of_metaroots) + "\n")
+ log.info(" Tree strategy:\t\t" + params.tree_strategy + "\n")
+
+########################################################################################################################
+
+
+def CopyConfigs(params, log):
+ if os.path.exists(params.output_config_dir):
+ shutil.rmtree(params.output_config_dir)
+ params.cdr_labeler_config_dir = os.path.abspath(os.path.join(params.output_config_dir, "cdr_labeler"))
+ params.cdr_labeler_config_filename = os.path.join(params.cdr_labeler_config_dir, "config.info")
+
+ params.vj_finder_config_dir = os.path.abspath(os.path.join(params.output_config_dir, "vj_finder"))
+ params.vj_finder_config_filename = os.path.join(params.vj_finder_config_dir, "config.info")
+
+ shutil.copytree(ig_simulator_config_dir, params.output_config_dir)
+ shutil.copytree(cdr_labeler_config_dir, params.cdr_labeler_config_dir)
+ shutil.copytree(vj_finder_config_dir, params.vj_finder_config_dir)
+
+ params.output_config_file = os.path.join(params.output_config_dir, "config.info")
+ if not os.path.exists(params.output_config_file):
+ log.info("ERROR: Config file " + params.output_config_file + " was not found")
+ sys.exit(1)
+
+
+def ModifyParamsWrtOrganism(params, cdr_param_dict):
+ params.organism = "human"
+ cdr_param_dict['imgt_v_annotation'] = os.path.join(data_annotation_dir, params.organism + "_v_imgt.txt")
+ cdr_param_dict['kabat_v_annotation'] = os.path.join(data_annotation_dir, params.organism + "_v_kabat.txt")
+ cdr_param_dict['imgt_j_annotation'] = os.path.join(data_annotation_dir, params.organism + "_j_imgt.txt")
+ cdr_param_dict['kabat_j_annotation'] = os.path.join(data_annotation_dir, params.organism + "_j_kabat.txt")
+ return cdr_param_dict
+
+
+def ModifyConfigFiles(params, log):
+ igs_params_dict = dict()
+ igs_params_dict['output_dir'] = params.output_dir
+ igs_params_dict['loci'] = params.loci
+ igs_params_dict['number_of_metaroots'] = params.number_of_metaroots
+ igs_params_dict['pool_manager_strategy'] = params.tree_strategy
+ igs_params_dict['germline_dir'] = os.path.join(home_directory, "data/germline")
+ igs_params_dict['cdr_labeler_config_filename'] = params.cdr_labeler_config_filename
+
+ cdr_params_dict = dict()
+ cdr_params_dict['vj_finder_config'] = params.vj_finder_config_filename
+
+ vjf_params_dict = dict()
+ params.germline_config_file = os.path.join(params.vj_finder_config_dir, "germline_files_config.txt")
+ vjf_params_dict['germline_filenames_config'] = params.germline_config_file
+ vjf_params_dict['germline_dir'] = os.path.join(home_directory, "data/germline")
+ igs_params_dict['germline_filenames_config'] = params.germline_config_file
+
+ cdr_params_dict = ModifyParamsWrtOrganism(params, cdr_params_dict)
+ process_cfg.substitute_params(params.output_config_file, igs_params_dict, log)
+ process_cfg.substitute_params(params.cdr_labeler_config_filename, cdr_params_dict, log)
+ process_cfg.substitute_params(params.vj_finder_config_filename, vjf_params_dict, log)
+
+
+def PrepareConfigs(params, log):
+ CopyConfigs(params, log)
+ ModifyConfigFiles(params, log)
+
+
+def RunTool(params, log):
+ try:
+ igs_command_line = ig_simulator_bin + " " + \
+ params.output_config_file
+ support.sys_call(igs_command_line, log)
+ log.info("\nThank you for using " + tool_name + "!\n")
+ except (KeyboardInterrupt):
+ log.info("\n" + tool_name + " was interrupted!")
+ except Exception:
+ exc_type, exc_value, _ = sys.exc_info()
+ if exc_type == SystemExit:
+ sys.exit(exc_value)
+ else:
+ log.exception(exc_value)
+ log.info("\nERROR: Exception caught.")
+ except BaseException:
+ exc_type, exc_value, _ = sys.exc_info()
+ if exc_type == SystemExit:
+ sys.exit(exc_value)
+ else:
+ log.exception(exc_value)
+ log.info("\nERROR: Exception caught.")
+
+
+def main(argv):
+ log = get_logger()
+ params = parse_args()
+ print(params)
+ CheckBinariesExistance(params, log)
+ CheckParamsCorrectness(params, log)
+ SetOutputParams(params, log)
+
+ PrepareOutputDir(params)
+ log = add_log_handler(params, log)
+
+ # print command line
+ command_line = "Command_line: "
+ command_line += " ".join(argv)
+ log.info(command_line + "\n")
+ PrintParams(params, log)
+ log.info("Log will be written to " + params.log_filename + "\n")
+
+ PrepareConfigs(params, log)
+
+ RunTool(params, log)
+ log.info("Log was written to " + params.log_filename)
+
+if __name__ == '__main__':
+ main(sys.argv)
diff --git a/ig_simulator_manual.html b/ig_simulator_manual.html
new file mode 100644
index 00000000..edd60154
--- /dev/null
+++ b/ig_simulator_manual.html
@@ -0,0 +1,255 @@
+
+ IgSimulator 2.0.alpha Manual
+
+
+
+
+
+IgSimulator 2.0.alpha manual
+
+1. What is IgSimulator?
+
+2. Installation
+ 2.1. Verifying your installation
+
+3. IgSimulator usage
+ 3.1. Basic options
+ 3.2. Advanced options
+ 3.3. Examples
+ 3.4. Output files
+
+4. Output file formats
+ 4.1. Base repertoire fasta
+ 4.2. Base repertoire info
+ 4.3. Full and filtered pool fasta
+ 4.4. Clonal Trees files
+
+
+
+5. Feedback and bug reports
+
+
+
+1. What is IgSimulator?
+
+ IgSimulator is a tool for simulation of antibody repertoires, clonal lineages and clonal trees.
+ It performs the following steps:
+
+ - simulates metaroots — each as a result of certain V(D)J recombination,
+ - for each metaroot simulates a number of trees that are simulated at the 3rd step with this metaroot as a tree root,
+ - for each metaroot simulates clonal trees imitating evolutionary process and clonal selection.
+
+
+
+Some vertices of a clonal tree are marked absent to imitate evolutionary process.
+
+
+
+2. Installation
+
+IgSimulator has the following dependencies:
+
+ - 64-bit Linux or MacOS system,
+ - g++ (version 4.7 or higher) or clang compiler,
+ - Cmake (version 2.8.8 or higher),
+ - Python 2.7.
+
+
+To assemble IgSimulator, type
+
+
+ make
+
+
+
+To install IgSimulator (after the previous step) type
+
+
+ make install
+
+
+
+If you want to install IgSimulator to a particular path $YOUR_PATH
, type
+
+
+ make install prefix=$YOUR_PATH
+
+
+
+
+2.1. Verifying your installation
+
+► To try IgSimulator, run:
+
+ ./ig_simulator.py --test
+
+
+
+Test run should take not more than several seconds.
+If the installation of IgSimulator is successful, you will find the following information at the end of the log:
+
+
+
+ Thank you for using IgSimulator!
+ Log was written to <your_installation_dir>/ig_simulator_test/ig_simulator.log
+
+
+
+
+3. IgSimulator usage
+
+To run IgSimulator, type:
+
+
+ ./ig_simulator.py [options] -o <output_dir>
+
+
+
+
+3.1. Basic options
+
+-o / --output <output_dir>
+output directory (required).
+
+
+
+--test
+Running at default parameters at a test directory.
+Command line corresponding to the test run is equivalent to the following:
+
+
+ ./ig_simulator.py -o ig_simulator_test
+
+
+
+--help
+Printing help.
+
+
+
+
+3.2. Advanced options
+
+-l / --loci <str>
+Immunological loci to simulate V(D)J-recombination.
+Available values are IGH
/ IGL
/ IGK
.
+Default value is IGH
.
+
+
+
+-n / --n_metaroots <int>
+Number of metaroots (results of V(D)J-recombinations) to simulate.
+Default value is 10
.
+
+
+
+-s / --tree_strategy <str>
+Strategy to simulate clonal trees.
+Available values are deep
/ wide
/ uniform
.
+Default value is deep
.
+
+
+
+3.3. Examples
+To perform simulation of 50
metaroots with clonal tree simulation strategy uniform
+and output to ig_simulator_test
directory, run
+
+
+ ./ig_simulator.py -n 50 -s uniform -o ig_simulator_test
+
+
+
+
+
+3.4. Output files
+IgSimulator creates working directory (which name was specified using option -o
)
+and outputs the following files and directories there:
+
+
+ -
+ base_repertoire.fasta — simulated metaroots — results of V(D)J recombination
+ (Description).
+
+ -
+ base_repertoire.info — detailed information about V(D)J recombination for each metaroot
+ (Description).
+
+ -
+ filtered_pool.fasta, full_pool.fasta — simulated repertoire with certain clones filtered out (imitation of clonal selection)
+ and full repertoire without any filtration
+ (Description).
+
+
+
+
+ -
+ trees_dir — directory that containes ready-to-draw dot files for all simulated clonal trees
+ (Description).
+
+
+
+
+ -
+ ig_simulator.log — a full log of IgSimulator tool.
+
+
+
+
+4. Output file formats
+
+4.1. Base repertoire fasta
+base_repertoire.fasta presents all simulated metaroots in fasta format.
+Id of each metaroot matches the pattern forest_X_multiplicity_Y
where X
is a zero-based number of the metaroot (max is param -n
minus one)
+and Y
is the number of trees that are simulated with this metaroot as a root.
+
+4.2. Base repertoire info
+base_repertoire.info presents the following information about each simulated metaroot
+
+ Index
corresponds to the index in the id of the metaroot in the base_repertoire.fasta
(here).
+ V/D/J names and sequences
— names of gene segments and their sequences that form the metaroot.
+ Cleavage in V/D(left)/D(right)/J gene
— the lenght of the cleavage. Negative cleavage presents a palindrome insertion.
+ Insertion in VD/DJ junction
— non-genomic insertions.
+ CDR1/2/3 positions and sequences
— zero-based positions of CDRegions in the metaroot sequence and corresponding sequences themselves.
+
+
+4.3. Full and filtered pool fasta
+full_pool.fasta presents the full pool of all sequences that are simulated.
+Id of each sequence matches the pattern forest_X_tree_Y_antibody_Z
where
+
+ X
is a zero-based number of the metaroot (max is param -n
minus one),
+ Y
is a zero-based number of the clonal tree that the sequence is a vertex of,
+ Z
is a zero-based number of the sequence in the Y
th clonal tree.
+
+
+Due to clonal selection a certain number of sequences is absent from the real repertoire.
+filtered_pool.fasta presents sequences in the same format as of full_pool.fasta.
+However, the former is a subset of the latter.
+
+4.4. Clonal trees files
+
+Each file in the directory trees_dir
represents a certain simulated clonal tree in ready-to-draw dot
format.
+The name of each file matches the pattern forest_X_tree_Y.dot
where
+
+ X
is a zero-based number of the metaroot (max is param -n
minus one),
+ Y
is a zero-based number of the clonal tree among those possessing the metaroot as their root.
+
+
+The id of each vertex is the Z
defined here.
+Productive/non-productive sequences are shaped as circles/rectangulars.
+Absent seqs (that are present only in full_pool.fasta but not in the filtered_pool.fasta) are colored in magenta.
+Additionally dot file containes comments about simulated SHMs.
+
+
+
+5. Feedback and bug reports
+Your comments, bug reports, and suggestions are very welcome.
+They will help us to further improve IgSimulator.
+
+If you have any trouble running IgSimulator, please send us the log file from the output directory.
+
+Address for communications: igtools_support@googlegroups.com.
diff --git a/py/ig_simulator_tools/naive_tree_simulator.py b/py/ig_simulator_tools/naive_tree_simulator.py
new file mode 100644
index 00000000..dbdbb3b3
--- /dev/null
+++ b/py/ig_simulator_tools/naive_tree_simulator.py
@@ -0,0 +1,179 @@
+from __future__ import division
+import os
+import errno
+
+from Bio import SeqIO
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+import numpy as np
+import random
+
+
+def smart_makedirs(dirname):
+ try:
+ os.makedirs(dirname)
+ except OSError as exc:
+ if exc.errno != errno.EEXIST:
+ raise exc
+
+
+def random_mutations(antibody, pois_p=1):
+ antibody = list(antibody)
+ n_mut = np.random.poisson(pois_p, 1)[0] + 1 # at least one mutation
+ mut_ind = np.random.choice(range(len(antibody)), size=n_mut, replace=False)
+ mut_ind = [i for i in mut_ind if antibody[i] != 'N']
+
+ def rand_nucl(nucl):
+ bases = list("ACGT")
+ bases.remove(nucl)
+ return bases[np.random.randint(3)]
+
+ mut_ind = [(i, rand_nucl(antibody[i])) for i in mut_ind]
+
+ for i, nucl in mut_ind:
+ antibody[i] = nucl
+
+ return "".join(antibody), mut_ind
+
+
+class ParsedRecord(object):
+ def __init__(self, record):
+ record.name = str(record.name)
+ self.name, self.multiplicity = [int(x) for x in record.name.split('_') if x.isdigit()]
+ self.metaroot_seq = str(record.seq)
+
+
+class Node(object):
+ def __init__(self, seq, numb):
+ self.seq = seq
+ self.children = []
+ self.numb = numb
+ self.included = True
+
+
+def generate_tree(seq, n, ret_prob, pois_p):
+ pool = []
+ root = Node(seq, 0)
+ pool.append(root)
+ indeces = []
+ weights = []
+ for i in xrange(n):
+ indeces.append(i)
+ weights.append(1)
+ index = np.random.choice(indeces, size=1,
+ p=np.array(weights) / sum(weights))[0]
+ weights[index] += 1
+ e = pool[index]
+ mut_seq, mutations = random_mutations(e.seq, pois_p)
+ mut_e = Node(mut_seq, i + 1)
+ e.children.append((mut_e, mutations))
+ bern = np.random.binomial(1, ret_prob, 1)[0]
+ if not bern:
+ e.included = False
+ pool.append(mut_e)
+
+ results = {'root': root}
+ results['all_seqs'] = [(id, x.seq) for id, x in enumerate(pool)]
+ results['filtered_seqs'] = [(id, x.seq) for id, x in enumerate(pool) if x.included]
+ results['edge_list'] = edge_list(root)
+ return results
+
+
+def edge_list(root):
+ elist = [(root.numb, x.numb, mut) for x, mut in root.children]
+ for x, mut in root.children:
+ elist += edge_list(x)
+ return elist
+
+
+def go(records, lamb, ret_prob, pois_p):
+ records = [ParsedRecord(record) for record in records]
+ results = []
+ for i, record in enumerate(records):
+ print(i + 1, len(records))
+ results.append([generate_tree(record.metaroot_seq,
+ np.random.geometric(lamb, size=1)[0],
+ ret_prob=ret_prob,
+ pois_p=pois_p)
+ for _ in xrange(record.multiplicity)])
+ return results
+
+
+def output_forests(forests, output_folder = ""):
+ all_records, filtered_records = [], []
+
+ def create_records(x):
+ return [SeqRecord(Seq(seq),
+ id="metaroot_%d_tree_%d_id_%d" % (i, m, id),
+ description="")
+ for id, seq in x]
+
+ for i, forest in enumerate(forests):
+ for m, tree in enumerate(forest):
+ all_records += create_records(tree['all_seqs'])
+ filtered_records += create_records(tree['filtered_seqs'])
+
+ smart_makedirs(output_folder)
+ SeqIO.write(filtered_records, os.path.join(output_folder, "filtered_records.fasta"), "fasta")
+ SeqIO.write(all_records, os.path.join(output_folder, "all_records.fasta"), "fasta")
+
+ edge_lists_dir = os.path.join(output_folder, "trees_edge_lists")
+ smart_makedirs(edge_lists_dir)
+ for i, forest in enumerate(forests):
+ for m, tree in enumerate(forest):
+ with open(os.path.join(edge_lists_dir, "antibody_%d_tree_%d.txt" %(i, m)), "w") as f:
+ for tup in tree['edge_list']:
+ f.write("%d %d %s\n" % tup)
+
+
+def ParseCommandLineParams():
+ import argparse
+ current_dir = os.path.dirname(os.path.realpath(__file__))
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-o", "--outdir",
+ type=str,
+ default=current_dir,
+ help="Output directory")
+ root_dir = os.path.realpath(os.path.join(current_dir, "../../"))
+ input_file = os.path.join(root_dir, "ig_simulator_test/test.fa")
+ parser.add_argument("-i", "--input",
+ type=str,
+ default=input_file)
+ parser.add_argument("--seed",
+ type=int,
+ default=int(np.random.randint(low=0, high=100000, size=1)[0]))
+ parser.add_argument("-s", "--exp_tree_size",
+ type=float,
+ default=0.01,
+ help="Mean tree size")
+ parser.add_argument("-r", "--ret_prob",
+ type=float,
+ default=0.5,
+ help="Probability to return chosen antibody to the pool")
+ parser.add_argument("-p", "--pois_p",
+ type=float,
+ default=1.,
+ help="Pois parameter for number of mut")
+ return parser.parse_args()
+
+
+def dump_params(params):
+ import json
+ smart_makedirs(params.outdir)
+ with open(os.path.join(params.outdir, "params.txt"), 'w') as f:
+ json.dump(vars(params), f, sort_keys=True, indent=4)
+
+
+def main():
+ params = ParseCommandLineParams()
+ dump_params(params)
+ np.random.seed(params.seed)
+ with open(params.input, "r") as f:
+ records = list(SeqIO.parse(f, "fasta"))
+
+ results = go(records, lamb=params.exp_tree_size, ret_prob=params.ret_prob, pois_p=params.pois_p)
+ output_forests(results, params.outdir)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a91655a2..16468cd5 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -35,6 +35,7 @@ set(ALGORITHMS_DIR "${IGREC_MAIN_SRC_DIR}/algorithms")
set(VDJ_UTILS_DIR "${IGREC_MAIN_SRC_DIR}/vdj_utils")
set(VJ_FINDER_DIR "${IGREC_MAIN_SRC_DIR}/vj_finder")
set(CDR_LABELER_DIR "${IGREC_MAIN_SRC_DIR}/cdr_labeler")
+set(IG_SIMULATOR_DIR "${IGREC_MAIN_SRC_DIR}/ig_simulator")
# Everything option-dependent
include(options)
@@ -89,3 +90,4 @@ add_subdirectory(cdr_labeler)
add_subdirectory(test)
add_subdirectory(umi_experiments)
add_subdirectory(pcr_simulator)
+add_subdirectory(ig_simulator)
diff --git a/src/cdr_labeler/cdr_launch.cpp b/src/cdr_labeler/cdr_launch.cpp
index a41ae25f..361f5cae 100644
--- a/src/cdr_labeler/cdr_launch.cpp
+++ b/src/cdr_labeler/cdr_launch.cpp
@@ -1,12 +1,13 @@
#include "cdr_launch.hpp"
#include
-#include "germline_db_generator.hpp"
+#include "germline_utils/germline_db_generator.hpp"
#include "germline_db_labeler.hpp"
#include "vj_parallel_processor.hpp"
#include "read_labeler.hpp"
#include "cdr_output.hpp"
#include "diversity_analyser.hpp"
+#include "germline_utils/germline_config.hpp"
//#include "cdr_annotator.hpp"
namespace cdr_labeler {
@@ -16,7 +17,7 @@ namespace cdr_labeler {
core::ReadArchive read_archive(config_.input_params.input_reads);
if(config_.vj_finder_config.io_params.output_params.output_details.fix_spaces)
read_archive.FixSpacesInHeaders();
- vj_finder::GermlineDbGenerator db_generator(config_.vj_finder_config.io_params.input_params.germline_input,
+ germline_utils::GermlineDbGenerator db_generator(config_.vj_finder_config.io_params.input_params.germline_input,
config_.vj_finder_config.algorithm_params.germline_params);
INFO("Generation of DB for variable segments...");
germline_utils::CustomGeneDatabase v_db = db_generator.GenerateVariableDb();
diff --git a/src/ig_simulator/CMakeLists.txt b/src/ig_simulator/CMakeLists.txt
new file mode 100644
index 00000000..a78a270f
--- /dev/null
+++ b/src/ig_simulator/CMakeLists.txt
@@ -0,0 +1,58 @@
+project(ig_simulator CXX)
+
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+include_directories(${SPADES_MAIN_INCLUDE_DIR})
+include_directories(${CORE_DIR})
+include_directories(${VDJ_UTILS_DIR})
+include_directories(${VJ_FINDER_DIR})
+include_directories(${CDR_LABELER_DIR})
+
+add_library(ig_simulator_library STATIC
+ ig_simulator_config.cpp
+ ig_simulator_launch.cpp
+ base_repertoire/gene_chooser/abstract_gene_chooser.cpp
+ base_repertoire/gene_chooser/uniform_gene_chooser.cpp
+ base_repertoire/gene_chooser/config_based_getter.cpp
+ base_repertoire/nucleotides_remover/abstract_nucleotides_remover.cpp
+ base_repertoire/nucleotides_remover/uniform_nucleotides_remover.cpp
+ base_repertoire/nucleotides_remover/config_based_getter.cpp
+ simulation_routines.cpp
+ base_repertoire/p_nucleotides_creator/abstract_nucleotides_creator.cpp
+ base_repertoire/p_nucleotides_creator/uniform_nucleotides_creator.cpp
+ base_repertoire/p_nucleotides_creator/config_based_getter.cpp
+ base_repertoire/metaroot_creator/metaroot_creator.cpp
+ base_repertoire/metaroot_creator/config_based_getter.cpp
+ base_repertoire/n_nucleotides_inserter/abstract_n_nucleotides_inserter.cpp
+ base_repertoire/n_nucleotides_inserter/uniform_n_nucleotides_inserter.cpp
+ base_repertoire/n_nucleotides_inserter/config_based_getter.cpp
+ base_repertoire/metaroot/metaroot.cpp
+ base_repertoire/productivity_checker/productivity_checker.cpp
+ base_repertoire/multiplicity_creator/multiplicity_creator.cpp
+ base_repertoire/base_repertoire_simulator.cpp
+ base_repertoire/metaroot_cluster/metaroot_cluster.cpp
+ base_repertoire/base_repertoire.cpp
+ clonal_trees/tree/node.cpp
+ clonal_trees/tree_creator/pool_manager.cpp
+ clonal_trees/tree_creator/cartesian_tree.cpp
+ clonal_trees/tree/tree.cpp
+ clonal_trees/forest/forest.cpp
+ clonal_trees/tree_creator/tree_creator.cpp
+ clonal_trees/tree_creator/tree_size_generator.cpp
+ clonal_trees/tree_creator/shm_creator.cpp
+ clonal_trees/tree_creator/forest_creator.cpp
+ clonal_trees/tree_creator/forest_storage_creator.cpp
+ clonal_trees/tree_creator/exporters.cpp
+ clonal_trees/fast_stop_codon_checker/fast_stop_codon_checker.cpp
+ base_repertoire/gene_chooser/custom_gene_chooser.cpp)
+
+target_link_libraries(ig_simulator_library
+ vj_finder_library
+ cdr_labeler_library
+ input
+ ${COMMON_LIBRARIES}
+ )
+
+
+add_executable(ig_simulator main.cpp)
+
+target_link_libraries(ig_simulator ig_simulator_library)
diff --git a/src/ig_simulator/base_repertoire/base_repertoire.cpp b/src/ig_simulator/base_repertoire/base_repertoire.cpp
new file mode 100644
index 00000000..79886de4
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/base_repertoire.cpp
@@ -0,0 +1,24 @@
+//
+// Created by Andrew Bzikadze on 4/2/17.
+//
+
+#include "base_repertoire.hpp"
+
+namespace ig_simulator {
+
+void print_base_repertoire(const BaseRepertoire& base_repertoire, std::ostream& fasta, std::ostream& info) {
+ size_t id = 0;
+ for (const auto& cluster : base_repertoire) {
+ fasta << ">forest_" << id << "_multiplicity_" << cluster.Multiplicity() << '\n';
+ fasta << cluster.MetarootPtr()->Sequence() << '\n';
+
+ info << "Index (zero-based): " << id << " / " << base_repertoire.size() - 1
+ << " (" << base_repertoire.size() << ") "
+ << '\n' << *(cluster.MetarootPtr())
+ <<"***************************************************************************\n\n";
+
+ id++;
+ }
+}
+
+} // End namespapce ig_simulator
diff --git a/src/ig_simulator/base_repertoire/base_repertoire.hpp b/src/ig_simulator/base_repertoire/base_repertoire.hpp
new file mode 100644
index 00000000..96061b98
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/base_repertoire.hpp
@@ -0,0 +1,15 @@
+//
+// Created by Andrew Bzikadze on 4/2/17.
+//
+
+#pragma once
+
+#include "metaroot_cluster/metaroot_cluster.hpp"
+
+namespace ig_simulator {
+
+using BaseRepertoire = std::vector;
+
+void print_base_repertoire(const BaseRepertoire& base_repertoire, std::ostream& fasta, std::ostream& info);
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/base_repertoire_simulator.cpp b/src/ig_simulator/base_repertoire/base_repertoire_simulator.cpp
new file mode 100644
index 00000000..62c86013
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/base_repertoire_simulator.cpp
@@ -0,0 +1,31 @@
+//
+// Created by Andrew Bzikadze on 3/29/17.
+//
+
+#include "base_repertoire_simulator.hpp"
+
+namespace ig_simulator {
+
+BaseRepertoire BaseRepertoireSimulator::Simulate(size_t size) {
+ BaseRepertoire repertoire;
+ repertoire.reserve(size);
+
+ size_t productive_size = static_cast (static_cast(size) * productive_part);
+ size_t i = 0;
+ while(i < productive_size) {
+ MetarootCluster cluster{metaroot_creator_p->Createroot(),
+ multiplicity_creator_p->RandomMultiplicity()};
+ if (cluster.MetarootPtr()->IsProductive()) {
+ repertoire.emplace_back(std::move(cluster));
+ i++;
+ }
+ }
+
+ for(; i < size; ++i) {
+ repertoire.emplace_back(metaroot_creator_p->Createroot(),
+ multiplicity_creator_p->RandomMultiplicity());
+ }
+ return repertoire;
+}
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/base_repertoire_simulator.hpp b/src/ig_simulator/base_repertoire/base_repertoire_simulator.hpp
new file mode 100644
index 00000000..5660c70a
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/base_repertoire_simulator.hpp
@@ -0,0 +1,45 @@
+//
+// Created by Andrew Bzikadze on 3/29/17.
+//
+
+#pragma once
+
+#include
+#include
+#include
+
+#include "ig_simulator_config.hpp"
+#include "germline_utils/germline_db_generator.hpp"
+#include "metaroot_cluster/metaroot_cluster.hpp"
+#include "metaroot_creator/config_based_getter.hpp"
+#include "multiplicity_creator/multiplicity_creator.hpp"
+#include "base_repertoire/base_repertoire.hpp"
+#include "productivity_checker/productivity_checker.hpp"
+
+namespace ig_simulator {
+
+class BaseRepertoireSimulator {
+private:
+ AbstractMetarootCreatorCPtr metaroot_creator_p;
+ AbstractMultiplicityCreatorPtr multiplicity_creator_p;
+ double productive_part;
+
+public:
+ BaseRepertoireSimulator(const IgSimulatorConfig::SimulationParams::BaseRepertoireParams& config,
+ const germline_utils::ChainType& chain_type,
+ std::vector &db):
+ metaroot_creator_p(get_metarootcreator(chain_type, config.metaroot_simulation_params, db)),
+ multiplicity_creator_p(get_multiplicity_creator(config.multiplicity_creator_params)),
+ productive_part(config.productive_params.productive_part)
+ { }
+
+ BaseRepertoireSimulator() = delete;
+ BaseRepertoireSimulator(const BaseRepertoireSimulator&) = delete;
+ BaseRepertoireSimulator(BaseRepertoireSimulator&&) = default;
+ BaseRepertoireSimulator& operator=(const BaseRepertoireSimulator&) = delete;
+ BaseRepertoireSimulator& operator=(BaseRepertoireSimulator&&) = delete;
+
+ BaseRepertoire Simulate(size_t size);
+};
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/gene_chooser/abstract_gene_chooser.cpp b/src/ig_simulator/base_repertoire/gene_chooser/abstract_gene_chooser.cpp
new file mode 100644
index 00000000..dc805406
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/gene_chooser/abstract_gene_chooser.cpp
@@ -0,0 +1,28 @@
+//
+// Created by Andrew Bzikadze on 3/16/17.
+//
+
+#include "abstract_gene_chooser.hpp"
+
+namespace ig_simulator {
+
+AbstractVDJGeneChooser::AbstractVDJGeneChooser(const std::vector& db):
+ v_db_p_(&db.front()),
+ d_db_p_(nullptr),
+ j_db_p_(&db.back()),
+ is_vdj(false)
+{
+ VERIFY(db.size() >= 2 and db.size() <= 3);
+
+ if (db.size() == 3) {
+ d_db_p_ = &db[1];
+ is_vdj = true;
+ }
+}
+
+inline bool AbstractVDJGeneChooser::IsVDJ() const {
+ // if (not is_vdj) { VERIFY(d_db_p_ != nullptr); }
+ return is_vdj;
+}
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/gene_chooser/abstract_gene_chooser.hpp b/src/ig_simulator/base_repertoire/gene_chooser/abstract_gene_chooser.hpp
new file mode 100644
index 00000000..f2578ee0
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/gene_chooser/abstract_gene_chooser.hpp
@@ -0,0 +1,50 @@
+//
+// Created by Andrew Bzikadze on 3/16/17.
+//
+
+#pragma once
+
+#include
+#include
+
+#include "germline_utils/germline_databases/custom_gene_database.hpp"
+#include "ig_simulator_utils.hpp"
+
+namespace ig_simulator {
+
+using VDJ_GenesIndexTuple = std::tuple;
+
+class AbstractVDJGeneChooser {
+protected:
+ const germline_utils::CustomGeneDatabase *v_db_p_;
+ const germline_utils::CustomGeneDatabase *d_db_p_;
+ const germline_utils::CustomGeneDatabase *j_db_p_;
+
+ // This variable defines whether D segment is generated
+ // d_dp_p_ MUST be nullptr if is_vdj == false
+ bool is_vdj;
+
+public:
+ explicit AbstractVDJGeneChooser(const std::vector& db);
+
+ virtual VDJ_GenesIndexTuple ChooseGenes() const = 0;
+
+ /**
+ * This method suggests whether D segment is generated.
+ * If `false` then second component of `VDJ_GenesIndexTuple`
+ * returned by `ChooseGenes()` will be size_t(-1).
+ */
+ bool IsVDJ() const;
+
+ AbstractVDJGeneChooser() = delete;
+ AbstractVDJGeneChooser(const AbstractVDJGeneChooser&) = delete;
+ AbstractVDJGeneChooser(AbstractVDJGeneChooser&&) = delete;
+ AbstractVDJGeneChooser& operator=(const AbstractVDJGeneChooser&) = delete;
+ AbstractVDJGeneChooser& operator=(AbstractVDJGeneChooser&&) = delete;
+
+ virtual ~AbstractVDJGeneChooser() { }
+};
+
+using AbstractVDJGeneChooserCPtr = std::unique_ptr;
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/gene_chooser/config_based_getter.cpp b/src/ig_simulator/base_repertoire/gene_chooser/config_based_getter.cpp
new file mode 100644
index 00000000..d3f5b7cb
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/gene_chooser/config_based_getter.cpp
@@ -0,0 +1,24 @@
+//
+// Created by Andrew Bzikadze on 3/31/17.
+//
+
+#include "config_based_getter.hpp"
+#include "uniform_gene_chooser.hpp"
+#include "custom_gene_chooser.hpp"
+
+
+namespace ig_simulator {
+
+AbstractVDJGeneChooserCPtr get_gene_chooser(const GeneChooserParams& config,
+ const std::vector& db)
+{
+ if (config.method == GeneChooserMethod::Uniform)
+ return AbstractVDJGeneChooserCPtr(new UniformVDJGeneChooser(db));
+ // TODO add Custom
+ // else if (config.method == GeneChooserMethod::Custom)
+ // return AbstractVDJGeneChooserCPtr(new CustomGeneChooser(db, config.custom_gene_chooser_params));
+ VERIFY(false);
+}
+
+} // End namespace ig_simulator
+
diff --git a/src/ig_simulator/base_repertoire/gene_chooser/config_based_getter.hpp b/src/ig_simulator/base_repertoire/gene_chooser/config_based_getter.hpp
new file mode 100644
index 00000000..c4efa0c7
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/gene_chooser/config_based_getter.hpp
@@ -0,0 +1,16 @@
+//
+// Created by Andrew Bzikadze on 3/31/17.
+//
+
+#pragma once
+
+#include "abstract_gene_chooser.hpp"
+#include "ig_simulator_config.hpp"
+
+namespace ig_simulator {
+
+AbstractVDJGeneChooserCPtr
+get_gene_chooser(const GeneChooserParams& config,
+ const std::vector& db);
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/gene_chooser/custom_gene_chooser.cpp b/src/ig_simulator/base_repertoire/gene_chooser/custom_gene_chooser.cpp
new file mode 100644
index 00000000..c2d77152
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/gene_chooser/custom_gene_chooser.cpp
@@ -0,0 +1,89 @@
+//
+// Created by Andrew Bzikadze on 4/26/17.
+//
+
+#include "custom_gene_chooser.hpp"
+
+namespace ig_simulator {
+
+std::vector
+CustomGeneChooser::ReadProbabilities(const std::string& filename,
+ const germline_utils::CustomGeneDatabase& db)
+{
+ using boost::tokenizer;
+ using boost::escaped_list_separator;
+ using Tokenizer = tokenizer>;
+
+ VERIFY(db.cbegin() + 1 == db.cend());
+ germline_utils::ImmuneGeneType igtype { *db.cbegin() };
+ const germline_utils::ImmuneGeneDatabase& igdb = db.GetConstDbByGeneType(igtype);
+
+ std::ifstream in;
+ in.open(filename);
+ VERIFY(in.is_open());
+
+ std::vector probs(db.size());
+ std::string line;
+
+ std::vector parsed_vector;
+ while (getline(in, line)) {
+ if (line.empty())
+ break;
+ Tokenizer tokenizer(line);
+ parsed_vector.assign(tokenizer.begin(), tokenizer.end());
+ assert(parsed_vector.size() == 2);
+ size_t index_of_current_gene = igdb.GetIndexByName(parsed_vector.front());
+ probs[index_of_current_gene] = std::stod(parsed_vector.back());
+ VERIFY(probs[index_of_current_gene] >= 0 and
+ probs[index_of_current_gene] <= 1);
+ }
+ in.close();
+ return probs;
+}
+
+std::discrete_distribution
+CustomGeneChooser::GetDistr(const std::string& filename,
+ const germline_utils::CustomGeneDatabase& db)
+{
+ std::vector probs { ReadProbabilities(filename, db) };
+ return { probs.begin(), probs.end() };
+}
+
+CustomGeneChooser::CustomGeneChooser(const std::vector& db,
+ const std::string& v_genes_probs,
+ const std::string& d_genes_probs,
+ const std::string& j_genes_probs):
+ AbstractVDJGeneChooser(db),
+ v_distr(GetDistr(v_genes_probs, db.front())),
+ d_distr(),
+ j_distr(GetDistr(j_genes_probs, db.back()))
+{
+ if (db.size() == 3) {
+ d_distr = GetDistr(d_genes_probs, db[1]);
+ }
+}
+
+CustomGeneChooser::CustomGeneChooser(const std::vector& db,
+ const GeneChooserParams::CustomGeneChooserParams& config):
+ CustomGeneChooser(db, config.v_genes_probs, config.d_genes_probs, config.j_genes_probs)
+{ }
+
+VDJ_GenesIndexTuple CustomGeneChooser::ChooseGenes() const
+{
+ VDJ_GenesIndexTuple result(size_t(-1), size_t(-1), size_t(-1));
+
+ VERIFY(v_db_p_ != nullptr);
+ std::get<0>(result) = v_distr(MTSingleton::GetInstance());
+
+ if (is_vdj) {
+ VERIFY(d_db_p_ != nullptr);
+ std::get<1>(result) = d_distr(MTSingleton::GetInstance());
+ }
+
+ VERIFY(j_db_p_ != nullptr);
+ std::get<2>(result) = j_distr(MTSingleton::GetInstance());
+
+ return result;
+}
+
+} // End namespace ig_simulator
\ No newline at end of file
diff --git a/src/ig_simulator/base_repertoire/gene_chooser/custom_gene_chooser.hpp b/src/ig_simulator/base_repertoire/gene_chooser/custom_gene_chooser.hpp
new file mode 100644
index 00000000..912e0958
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/gene_chooser/custom_gene_chooser.hpp
@@ -0,0 +1,40 @@
+//
+// Created by Andrew Bzikadze on 4/26/17.
+//
+
+#pragma once
+
+#include
+#include "random_generator.hpp"
+#include "abstract_gene_chooser.hpp"
+#include
+#include
+
+namespace ig_simulator {
+
+class CustomGeneChooser final : public AbstractVDJGeneChooser {
+private:
+ mutable std::discrete_distribution v_distr;
+ mutable std::discrete_distribution d_distr;
+ mutable std::discrete_distribution j_distr;
+
+private:
+ static std::vector ReadProbabilities(const std::string& filename,
+ const germline_utils::CustomGeneDatabase& db);
+
+ static std::discrete_distribution GetDistr(const std::string& filename,
+ const germline_utils::CustomGeneDatabase& db);
+
+public:
+ CustomGeneChooser(const std::vector& db,
+ const std::string& v_genes_probs,
+ const std::string& d_genes_probs,
+ const std::string& j_genes_probs);
+
+ CustomGeneChooser(const std::vector& db,
+ const GeneChooserParams::CustomGeneChooserParams& config);
+
+ VDJ_GenesIndexTuple ChooseGenes() const override;
+};
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/gene_chooser/uniform_gene_chooser.cpp b/src/ig_simulator/base_repertoire/gene_chooser/uniform_gene_chooser.cpp
new file mode 100644
index 00000000..2d4cee84
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/gene_chooser/uniform_gene_chooser.cpp
@@ -0,0 +1,27 @@
+//
+// Created by Andrew Bzikadze on 3/17/17.
+//
+
+#include "simulation_routines.hpp"
+#include "uniform_gene_chooser.hpp"
+
+namespace ig_simulator {
+
+VDJ_GenesIndexTuple UniformVDJGeneChooser::ChooseGenes() const {
+ VDJ_GenesIndexTuple result(size_t(-1), size_t(-1), size_t(-1));
+
+ VERIFY(v_db_p_ != nullptr);
+ std::get<0>(result) = random_index(0, v_db_p_->size() - 1);
+
+ if (is_vdj) {
+ VERIFY(d_db_p_ != nullptr);
+ std::get<1>(result) = random_index(0, d_db_p_->size() - 1);
+ }
+
+ VERIFY(j_db_p_ != nullptr);
+ std::get<2>(result) = random_index(0, j_db_p_->size() - 1);
+
+ return result;
+}
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/gene_chooser/uniform_gene_chooser.hpp b/src/ig_simulator/base_repertoire/gene_chooser/uniform_gene_chooser.hpp
new file mode 100644
index 00000000..188df559
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/gene_chooser/uniform_gene_chooser.hpp
@@ -0,0 +1,22 @@
+//
+// Created by Andrew Bzikadze on 3/17/17.
+//
+
+#pragma once
+
+#include
+
+#include "abstract_gene_chooser.hpp"
+
+namespace ig_simulator {
+
+class UniformVDJGeneChooser : public AbstractVDJGeneChooser {
+public:
+ explicit UniformVDJGeneChooser(const std::vector& db):
+ AbstractVDJGeneChooser(db)
+ { }
+
+ VDJ_GenesIndexTuple ChooseGenes() const override;
+};
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/metaroot/metaroot.cpp b/src/ig_simulator/base_repertoire/metaroot/metaroot.cpp
new file mode 100644
index 00000000..6d411a0f
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/metaroot/metaroot.cpp
@@ -0,0 +1,188 @@
+//
+// Created by Andrew Bzikadze on 3/22/17.
+//
+
+#include "metaroot.hpp"
+#include "convert.hpp"
+#include
+#include "seqan/sequence.h"
+
+namespace ig_simulator {
+
+inline
+AbstractMetaroot::AbstractMetaroot(const germline_utils::CustomGeneDatabase *v_db_p,
+ const germline_utils::CustomGeneDatabase *j_db_p,
+ const size_t v_ind,
+ const size_t j_ind,
+ const annotation_utils::CDRLabeling& cdr_labeling,
+ int cleavage_v,
+ int cleavage_j) :
+ v_db_p(check_pointer(v_db_p)),
+ j_db_p(check_pointer(j_db_p)),
+ v_ind(v_ind),
+ j_ind(j_ind),
+ cleavage_v(cleavage_v),
+ cleavage_j(cleavage_j),
+ cdr_labeling(cdr_labeling)
+{
+ VERIFY(v_ind < v_db_p->size());
+ VERIFY(j_ind < j_db_p->size());
+}
+
+void AbstractMetaroot::PrepareGene(seqan::Dna5String& gene, int left_cleavage, int right_cleavage) {
+ VERIFY(static_cast(abs(left_cleavage)) <= seqan::length(gene));
+ if (left_cleavage > 0) {
+ gene = seqan::suffix(gene, left_cleavage);
+ } else if (left_cleavage < 0) {
+ seqan::Dna5String pal = seqan::prefix(gene, -left_cleavage);
+ seqan::reverseComplement(pal);
+ pal += gene;
+ gene = pal;
+ }
+ VERIFY(static_cast(abs(right_cleavage)) <= seqan::length(gene));
+ if (right_cleavage > 0) {
+ gene = seqan::prefix(gene, seqan::length(gene) - right_cleavage);
+ } else if (right_cleavage < 0) {
+ seqan::Dna5String pal = seqan::suffix(gene, seqan::length(gene) + right_cleavage);
+ seqan::reverseComplement(pal);
+ gene += pal;
+ }
+}
+
+VJMetaroot::VJMetaroot(const germline_utils::CustomGeneDatabase *v_db_p,
+ const germline_utils::CustomGeneDatabase *j_db_p,
+ const size_t v_ind,
+ const size_t j_ind,
+ const annotation_utils::CDRLabeling &cdr_labeling,
+ int cleavage_v,
+ int cleavage_j,
+ seqan::Dna5String insertion_vj) :
+ AbstractMetaroot(v_db_p, j_db_p, v_ind, j_ind, cdr_labeling, cleavage_v, cleavage_j),
+ insertion_vj(insertion_vj)
+{
+ CalculateSequence();
+}
+
+const std::string& VJMetaroot::Sequence() const { return sequence; }
+
+void VJMetaroot::CalculateSequence() {
+ VERIFY(v_db_p != nullptr);
+ VERIFY(j_db_p != nullptr);
+
+ seqan::Dna5String v_gene = (*v_db_p)[v_ind].seq();
+ seqan::Dna5String j_gene = (*j_db_p)[j_ind].seq();
+
+ PrepareGene(v_gene, 0, cleavage_v);
+ PrepareGene(j_gene, cleavage_j, 0);
+
+ sequence = core::seqan_string_to_string(v_gene);
+ sequence += core::seqan_string_to_string(insertion_vj);
+ sequence += core::seqan_string_to_string(j_gene);
+}
+
+VDJMetaroot::VDJMetaroot(const germline_utils::CustomGeneDatabase *v_db_p,
+ const germline_utils::CustomGeneDatabase *d_db_p,
+ const germline_utils::CustomGeneDatabase *j_db_p,
+ const size_t v_ind,
+ const size_t d_ind,
+ const size_t j_ind,
+ const annotation_utils::CDRLabeling& cdr_labeling,
+ int cleavage_v,
+ int cleavage_d_left,
+ int cleavage_d_right,
+ int cleavage_j,
+ const seqan::Dna5String& insertion_vd,
+ const seqan::Dna5String& insertion_dj) :
+ AbstractMetaroot(v_db_p, j_db_p, v_ind, j_ind, cdr_labeling, cleavage_v, cleavage_j),
+ d_db_p(check_pointer(d_db_p)),
+ d_ind(d_ind),
+ cleavage_d_left(cleavage_d_left),
+ cleavage_d_right(cleavage_d_right),
+ insertion_vd(insertion_vd),
+ insertion_dj(insertion_dj)
+{
+ VERIFY(d_ind < d_db_p->size());
+ CalculateSequence();
+}
+
+const std::string& VDJMetaroot::Sequence() const { return sequence; }
+
+void VDJMetaroot::CalculateSequence() {
+ VERIFY(v_db_p != nullptr);
+ VERIFY(d_db_p != nullptr);
+ VERIFY(j_db_p != nullptr);
+
+ seqan::Dna5String v_gene = (*v_db_p)[v_ind].seq();
+ seqan::Dna5String d_gene = (*d_db_p)[d_ind].seq();
+ seqan::Dna5String j_gene = (*j_db_p)[j_ind].seq();
+
+ PrepareGene(v_gene, 0, cleavage_v);
+ PrepareGene(d_gene, cleavage_d_left, cleavage_d_right);
+ PrepareGene(j_gene, cleavage_j, 0);
+
+ sequence = core::seqan_string_to_string(v_gene);
+ sequence += core::seqan_string_to_string(insertion_vd);
+ sequence += core::seqan_string_to_string(d_gene);
+ sequence += core::seqan_string_to_string(insertion_dj);
+ sequence += core::seqan_string_to_string(j_gene);
+}
+
+void VJMetaroot::print(std::ostream& out) const {
+ out << "VJMetaroot:\n\n" <<
+
+ "V gene name: " << (*V_DB_P())[V_Ind()].name() << "\n" <<
+ "J gene name: " << (*J_DB_P())[J_Ind()].name() << "\n\n" <<
+
+ "V gene: " << (*V_DB_P())[V_Ind()].seq() << "\n" <<
+ "J gene: " << (*J_DB_P())[J_Ind()].seq() << "\n\n" <<
+
+ "Cleavage in V gene: " << CleavageV() << "\n" <<
+ "Cleavage in J gene: " << CleavageJ() << "\n\n" <<
+
+ "Insertion in VJ junction: " << InsertionVJ() << "\n\n" <<
+
+ "CDR1 positions: " << CDRLabeling().cdr1.start_pos << " " << CDRLabeling().cdr1.end_pos << "\n" <<
+ "CDR2 positions: " << CDRLabeling().cdr2.start_pos << " " << CDRLabeling().cdr2.end_pos << "\n" <<
+ "CDR3 positions: " << CDRLabeling().cdr3.start_pos << " " << CDRLabeling().cdr3.end_pos << "\n\n" <<
+
+ "CDR1: " << sequence.substr(CDRLabeling().cdr1.start_pos,
+ CDRLabeling().cdr1.end_pos - CDRLabeling().cdr1.start_pos + 1) << "\n" <<
+ "CDR2: " << sequence.substr(CDRLabeling().cdr2.start_pos,
+ CDRLabeling().cdr2.end_pos - CDRLabeling().cdr2.start_pos + 1) << "\n" <<
+ "CDR3: " << sequence.substr(CDRLabeling().cdr3.start_pos,
+ CDRLabeling().cdr3.end_pos - CDRLabeling().cdr3.start_pos + 1) << "\n";
+}
+
+void VDJMetaroot::print(std::ostream& out) const {
+ out << "VDJMetaroot:\n\n" <<
+
+ "V gene name: " << (*V_DB_P())[V_Ind()].name() << "\n" <<
+ "D gene name: " << (*D_DB_P())[D_Ind()].name() << "\n" <<
+ "J gene name: " << (*J_DB_P())[J_Ind()].name() << "\n\n" <<
+
+ "V gene: " << (*V_DB_P())[V_Ind()].seq() << "\n" <<
+ "D gene: " << (*D_DB_P())[D_Ind()].seq() << "\n" <<
+ "J gene: " << (*J_DB_P())[J_Ind()].seq() << "\n\n" <<
+
+ "Cleavage in V gene: " << CleavageV() << "\n" <<
+ "Cleavage in D gene (left): " << CleavageDLeft() << "\n" <<
+ "Cleavage in D gene (right): " << CleavageDRight() << "\n" <<
+ "Cleavage in J gene: " << CleavageJ() << "\n\n" <<
+
+ "Insertion in VD junction: " << InsertionVD() << "\n" <<
+ "Insertion in DJ junction: " << InsertionDJ() << "\n\n" <<
+
+ "CDR1 positions: " << CDRLabeling().cdr1.start_pos << " " << CDRLabeling().cdr1.end_pos << "\n" <<
+ "CDR2 positions: " << CDRLabeling().cdr2.start_pos << " " << CDRLabeling().cdr2.end_pos << "\n" <<
+ "CDR3 positions: " << CDRLabeling().cdr3.start_pos << " " << CDRLabeling().cdr3.end_pos << "\n\n" <<
+
+
+ "CDR1: " << sequence.substr(CDRLabeling().cdr1.start_pos,
+ CDRLabeling().cdr1.end_pos - CDRLabeling().cdr1.start_pos + 1) << "\n" <<
+ "CDR2: " << sequence.substr(CDRLabeling().cdr2.start_pos,
+ CDRLabeling().cdr2.end_pos - CDRLabeling().cdr2.start_pos + 1) << "\n" <<
+ "CDR3: " << sequence.substr(CDRLabeling().cdr3.start_pos,
+ CDRLabeling().cdr3.end_pos - CDRLabeling().cdr3.start_pos + 1) << "\n";
+}
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/metaroot/metaroot.hpp b/src/ig_simulator/base_repertoire/metaroot/metaroot.hpp
new file mode 100644
index 00000000..a8612c73
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/metaroot/metaroot.hpp
@@ -0,0 +1,148 @@
+//
+// Created by Andrew Bzikadze on 3/22/17.
+//
+
+#pragma once
+
+#include "gtest/gtest_prod.h"
+#include "germline_utils/germline_db_generator.hpp"
+#include "annotation_utils/cdr_labeling_primitives.hpp"
+#include "ig_simulator_utils.hpp"
+
+namespace ig_simulator {
+
+class AbstractMetaroot {
+ friend class IgSimulatorTest;
+ FRIEND_TEST(IgSimulatorTest, PrepareGeneTest);
+
+protected:
+ const germline_utils::CustomGeneDatabase * v_db_p;
+ const germline_utils::CustomGeneDatabase * j_db_p;
+
+ const size_t v_ind;
+ const size_t j_ind;
+
+ // Negative cleavage means palindrome
+ const int cleavage_v;
+ const int cleavage_j;
+
+ const annotation_utils::CDRLabeling cdr_labeling;
+
+ std::string sequence;
+
+ bool productive = true;
+
+protected:
+ static void PrepareGene(seqan::Dna5String& gene, int left_cleavage, int right_cleavage);
+ virtual void CalculateSequence() = 0;
+ virtual void print(std::ostream& out) const = 0;
+
+public:
+ AbstractMetaroot(const germline_utils::CustomGeneDatabase *v_db_p,
+ const germline_utils::CustomGeneDatabase *j_db_p,
+ const size_t v_ind,
+ const size_t j_ind,
+ const annotation_utils::CDRLabeling& cdr_labeling,
+ int cleavage_v,
+ int cleavage_j);
+
+ const germline_utils::CustomGeneDatabase *V_DB_P() const { return v_db_p; }
+ const germline_utils::CustomGeneDatabase *J_DB_P() const { return j_db_p; }
+
+ size_t V_Ind() const { return v_ind; }
+ size_t J_Ind() const { return j_ind; }
+ int CleavageV() const { return cleavage_v; }
+ int CleavageJ() const { return cleavage_j; }
+
+ const annotation_utils::CDRLabeling CDRLabeling() const { return cdr_labeling; }
+
+ size_t Length() const { return sequence.size(); }
+
+ virtual const std::string& Sequence() const = 0;
+
+ bool IsProductive() const { return productive; }
+ void SetNonProductive() { productive = false; }
+
+ friend std::ostream& operator<<(std::ostream& out, const AbstractMetaroot& root) {
+ root.print(out);
+ return out;
+ }
+
+ AbstractMetaroot() = delete;
+ AbstractMetaroot(const AbstractMetaroot&) = default;
+ AbstractMetaroot(AbstractMetaroot&&) = default;
+ AbstractMetaroot& operator=(const AbstractMetaroot&) = delete;
+ AbstractMetaroot& operator=(AbstractMetaroot&&) = delete;
+
+ virtual ~AbstractMetaroot() { }
+};
+
+using AbstractMetarootCPtr = std::unique_ptr;
+
+
+class VJMetaroot final: public AbstractMetaroot {
+private:
+ const seqan::Dna5String insertion_vj;
+ void CalculateSequence() override;
+ void print(std::ostream&) const override;
+
+public:
+ VJMetaroot(const germline_utils::CustomGeneDatabase *v_db_p,
+ const germline_utils::CustomGeneDatabase *j_db_p,
+ const size_t v_ind,
+ const size_t j_ind,
+ const annotation_utils::CDRLabeling &cdr_labeling,
+ int cleavage_v,
+ int cleavage_j,
+ seqan::Dna5String insertion_vj = "");
+
+ const seqan::Dna5String& InsertionVJ() const { return insertion_vj; }
+
+ const std::string& Sequence() const override;
+};
+
+
+class VDJMetaroot final: public AbstractMetaroot {
+private:
+ const germline_utils::CustomGeneDatabase * d_db_p;
+
+ const size_t d_ind;
+
+ // Negative cleavage means palindrome
+ const int cleavage_d_left;
+ const int cleavage_d_right;
+
+ const seqan::Dna5String insertion_vd;
+ const seqan::Dna5String insertion_dj;
+
+private:
+ void print(std::ostream&) const override;
+ void CalculateSequence() override;
+
+public:
+ VDJMetaroot(const germline_utils::CustomGeneDatabase *v_db_p,
+ const germline_utils::CustomGeneDatabase *d_db_p,
+ const germline_utils::CustomGeneDatabase *j_db_p,
+ const size_t v_ind,
+ const size_t d_ind,
+ const size_t j_ind,
+ const annotation_utils::CDRLabeling& cdr_labeling,
+ int cleavage_v,
+ int cleavage_d_left,
+ int cleavage_d_right,
+ int cleavage_j,
+ const seqan::Dna5String& insertion_vd = "",
+ const seqan::Dna5String& insertion_dj = "");
+
+ const germline_utils::CustomGeneDatabase *D_DB_P() const { return d_db_p; }
+
+ size_t D_Ind() const { return d_ind; }
+ int CleavageDLeft() const { return cleavage_d_left; }
+ int CleavageDRight() const { return cleavage_d_right; }
+ const seqan::Dna5String& InsertionVD() const { return insertion_vd; }
+ const seqan::Dna5String& InsertionDJ() const { return insertion_dj; }
+
+ const std::string& Sequence() const override;
+};
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/metaroot_cluster/metaroot_cluster.cpp b/src/ig_simulator/base_repertoire/metaroot_cluster/metaroot_cluster.cpp
new file mode 100644
index 00000000..bd839703
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/metaroot_cluster/metaroot_cluster.cpp
@@ -0,0 +1,5 @@
+//
+// Created by Andrew Bzikadze on 4/2/17.
+//
+
+#include "metaroot_cluster.hpp"
diff --git a/src/ig_simulator/base_repertoire/metaroot_cluster/metaroot_cluster.hpp b/src/ig_simulator/base_repertoire/metaroot_cluster/metaroot_cluster.hpp
new file mode 100644
index 00000000..11428809
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/metaroot_cluster/metaroot_cluster.hpp
@@ -0,0 +1,32 @@
+//
+// Created by Andrew Bzikadze on 4/2/17.
+//
+
+#pragma once
+
+#include "base_repertoire/metaroot/metaroot.hpp"
+
+namespace ig_simulator {
+
+class MetarootCluster {
+private:
+ AbstractMetarootCPtr metaroot_p;
+ size_t multiplicity;
+
+public:
+ MetarootCluster(AbstractMetarootCPtr&& metaroot_p,
+ const size_t multiplicity):
+ metaroot_p(std::move(metaroot_p)),
+ multiplicity(multiplicity)
+ { }
+
+ MetarootCluster(const MetarootCluster&) = delete;
+ MetarootCluster(MetarootCluster&&) = default;
+ MetarootCluster& operator=(const MetarootCluster&) = delete;
+ MetarootCluster& operator=(MetarootCluster&&) = delete;
+
+ const AbstractMetarootCPtr& MetarootPtr() const { return metaroot_p; }
+ size_t Multiplicity() const { return multiplicity; }
+};
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/metaroot_creator/config_based_getter.cpp b/src/ig_simulator/base_repertoire/metaroot_creator/config_based_getter.cpp
new file mode 100644
index 00000000..87b1584f
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/metaroot_creator/config_based_getter.cpp
@@ -0,0 +1,20 @@
+//
+// Created by Andrew Bzikadze on 3/31/17.
+//
+
+#include "config_based_getter.hpp"
+
+
+namespace ig_simulator {
+
+AbstractMetarootCreatorCPtr get_metarootcreator(const germline_utils::ChainType chain_type,
+ const MetarootSimulationParams& config,
+ std::vector& db)
+{
+ if (chain_type.IsVDJ())
+ return AbstractMetarootCreatorCPtr(new VDJMetarootCreator(config, db));
+ return AbstractMetarootCreatorCPtr(new VJMetarootCreator(config, db));
+}
+
+} // End namespace ig_simulator
+
diff --git a/src/ig_simulator/base_repertoire/metaroot_creator/config_based_getter.hpp b/src/ig_simulator/base_repertoire/metaroot_creator/config_based_getter.hpp
new file mode 100644
index 00000000..8547ec05
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/metaroot_creator/config_based_getter.hpp
@@ -0,0 +1,15 @@
+//
+// Created by Andrew Bzikadze on 3/31/17.
+//
+
+#pragma once
+
+#include "metaroot_creator.hpp"
+
+namespace ig_simulator {
+
+AbstractMetarootCreatorCPtr get_metarootcreator(const germline_utils::ChainType chain_type,
+ const MetarootSimulationParams& config,
+ std::vector& db);
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/metaroot_creator/metaroot_creator.cpp b/src/ig_simulator/base_repertoire/metaroot_creator/metaroot_creator.cpp
new file mode 100644
index 00000000..cfc74229
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/metaroot_creator/metaroot_creator.cpp
@@ -0,0 +1,160 @@
+//
+// Created by Andrew Bzikadze on 3/20/17.
+//
+
+#include "metaroot_creator.hpp"
+
+#include
+
+#include
+#include "random_generator.hpp"
+#include "annotation_utils/cdr_labeling_primitives.hpp"
+
+#include
+
+namespace ig_simulator {
+
+AbstractMetarootCreator::AbstractMetarootCreator(const MetarootSimulationParams& config,
+ std::vector& db,
+ AbstractVDJGeneChooserCPtr&& gene_chooser):
+ v_db_p(&db.front()),
+ j_db_p(&db.back()),
+ prob_cleavage_v(check_probability(config.cleavage_params.prob_cleavage_v)),
+ prob_cleavage_j(check_probability(config.cleavage_params.prob_cleavage_j)),
+ gene_chooser_p(std::move(gene_chooser)),
+ nucl_remover_p(get_nucleotides_remover(config.nucleotides_remover_params)),
+ nucl_creator_p(get_nucleotides_creator(config.p_nucleotides_creator_params)),
+ nucl_inserter_p(get_nucleotides_inserter(config.n_nucleotides_inserter_params)),
+ v_cdr_db(cdr_labeler::GermlineDbLabeler(db.front(), config.cdr_labeler_config.cdrs_params).ComputeLabeling()),
+ j_cdr_db(cdr_labeler::GermlineDbLabeler(db.back(), config.cdr_labeler_config.cdrs_params).ComputeLabeling()),
+ productivity_checker()
+{
+ VERIFY(db.size() >= 2);
+ VERIFY(v_db_p->size() > 0);
+ VERIFY(j_db_p->size() > 0);
+}
+
+VJMetarootCreator::VJMetarootCreator(const MetarootSimulationParams& config,
+ std::vector& db):
+ AbstractMetarootCreator(config, db, get_gene_chooser(config.gene_chooser_params, db))
+{
+ VERIFY(db.size() == 2);
+}
+
+AbstractMetarootCPtr VJMetarootCreator::Createroot() const {
+ auto genes_ind = gene_chooser_p->ChooseGenes();
+ VERIFY(std::get<1>(genes_ind) == size_t(-1));
+
+ bool is_cleavage_v = std::bernoulli_distribution(prob_cleavage_v)(MTSingleton::GetInstance());
+ bool is_cleavage_j = std::bernoulli_distribution(prob_cleavage_j)(MTSingleton::GetInstance());
+
+ int cleavage_v = is_cleavage_v ?
+ static_cast(nucl_remover_p->RemoveInVGene()) :
+ -static_cast(nucl_creator_p->CreateInVGene());
+
+ int cleavage_j = is_cleavage_j ?
+ static_cast(nucl_remover_p->RemoveInJGene()) :
+ -static_cast(nucl_creator_p->CreateInJGene());
+
+ seqan::Dna5String vj_insertion(nucl_inserter_p->GetVJInsertion());
+
+ const auto& v_gene = (*v_db_p)[std::get<0>(genes_ind)];
+ const auto& j_gene = (*j_db_p)[std::get<2>(genes_ind)];
+
+ annotation_utils::CDRLabeling cdr_labeling(v_cdr_db.GetLabelingByGene(v_gene));
+ annotation_utils::CDRLabeling j_gene_cdr_labeling(j_cdr_db.GetLabelingByGene(j_gene));
+
+ if (not v_cdr_db.CDRLabelingIsEmpty(v_gene) and not j_cdr_db.CDRLabelingIsEmpty(j_gene)) {
+ long long cdr3_end = static_cast(v_gene.length()) +
+ -cleavage_v +
+ static_cast(seqan::length(vj_insertion)) +
+ -cleavage_j +
+ static_cast(j_gene_cdr_labeling.cdr3.end_pos);
+ VERIFY(cdr3_end >= 0);
+ cdr_labeling.cdr3.end_pos = static_cast(cdr3_end);
+ }
+
+ VJMetaroot metaroot { v_db_p, j_db_p,
+ std::get<0>(genes_ind), std::get<2>(genes_ind),
+ cdr_labeling,
+ cleavage_v, cleavage_j,
+ vj_insertion };
+ if (not productivity_checker.IsProductive(metaroot)) {
+ metaroot.SetNonProductive();
+ }
+ return AbstractMetarootCPtr(new VJMetaroot(std::move(metaroot)));
+}
+
+VDJMetarootCreator::VDJMetarootCreator(const MetarootSimulationParams& config,
+ std::vector& db):
+ AbstractMetarootCreator(config, db, get_gene_chooser(config.gene_chooser_params, db)),
+ d_db_p(&db.at(1)),
+ prob_cleavage_d_left(check_probability(config.cleavage_params.prob_cleavage_d_left)),
+ prob_cleavage_d_right(check_probability(config.cleavage_params.prob_cleavage_d_right))
+{
+ VERIFY(db.size() == 3);
+ VERIFY(d_db_p->size() > 0);
+}
+
+AbstractMetarootCPtr VDJMetarootCreator::Createroot() const {
+ auto genes_ind = gene_chooser_p->ChooseGenes();
+
+ bool is_cleavage_v = std::bernoulli_distribution(prob_cleavage_v)(MTSingleton::GetInstance());
+ bool is_cleavage_d_left = std::bernoulli_distribution(prob_cleavage_d_left)(MTSingleton::GetInstance());
+ bool is_cleavage_d_right = std::bernoulli_distribution(prob_cleavage_d_right)(MTSingleton::GetInstance());
+ bool is_cleavage_j = std::bernoulli_distribution(prob_cleavage_j)(MTSingleton::GetInstance());
+
+ int cleavage_v = is_cleavage_v ?
+ static_cast(nucl_remover_p->RemoveInVGene()) :
+ -static_cast(nucl_creator_p->CreateInVGene());
+
+ int cleavage_d_left = is_cleavage_d_left ?
+ static_cast(nucl_remover_p->RemoveInDGeneLeft()) :
+ -static_cast(nucl_creator_p->CreateInDGeneLeft());
+
+ int cleavage_d_right = is_cleavage_d_right ?
+ static_cast(nucl_remover_p->RemoveInDGeneRight()) :
+ -static_cast(nucl_creator_p->CreateInDGeneRight());
+
+ int cleavage_j = is_cleavage_j ?
+ static_cast(nucl_remover_p->RemoveInJGene()) :
+ -static_cast(nucl_creator_p->CreateInJGene());
+
+ seqan::Dna5String vd_insertion(nucl_inserter_p->GetVDInsertion());
+ seqan::Dna5String dj_insertion(nucl_inserter_p->GetDJInsertion());
+
+ const auto& v_gene = (*v_db_p)[std::get<0>(genes_ind)];
+ const auto& d_gene = (*d_db_p)[std::get<1>(genes_ind)];
+ const auto& j_gene = (*j_db_p)[std::get<2>(genes_ind)];
+
+ annotation_utils::CDRLabeling cdr_labeling(v_cdr_db.GetLabelingByGene(v_gene));
+ annotation_utils::CDRLabeling j_gene_cdr_labeling(j_cdr_db.GetLabelingByGene(j_gene));
+
+ if (not v_cdr_db.CDRLabelingIsEmpty(v_gene) and not j_cdr_db.CDRLabelingIsEmpty(j_gene)) {
+ long long cdr3_end = static_cast(v_gene.length()) +
+ -cleavage_v +
+ static_cast(seqan::length(vd_insertion)) +
+ -cleavage_d_left +
+ static_cast(d_gene.length()) +
+ -cleavage_d_right +
+ static_cast(seqan::length(dj_insertion)) +
+ -cleavage_j +
+ static_cast(j_gene_cdr_labeling.cdr3.end_pos);
+
+ VERIFY(cdr3_end >= 0);
+ cdr_labeling.cdr3.end_pos = static_cast(cdr3_end);
+ }
+
+ VDJMetaroot metaroot { v_db_p, d_db_p, j_db_p,
+ std::get<0>(genes_ind), std::get<1>(genes_ind), std::get<2>(genes_ind),
+ cdr_labeling,
+ cleavage_v, cleavage_d_left, cleavage_d_right, cleavage_j,
+ vd_insertion, dj_insertion };
+
+ if (not productivity_checker.IsProductive(metaroot)) {
+ metaroot.SetNonProductive();
+ }
+ return AbstractMetarootCPtr(new VDJMetaroot(std::move(metaroot)));
+}
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/metaroot_creator/metaroot_creator.hpp b/src/ig_simulator/base_repertoire/metaroot_creator/metaroot_creator.hpp
new file mode 100644
index 00000000..4867b3bb
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/metaroot_creator/metaroot_creator.hpp
@@ -0,0 +1,87 @@
+//
+// Created by Andrew Bzikadze on 3/20/17.
+//
+
+#pragma once
+
+#include "germline_utils/chain_type.hpp"
+#include "base_repertoire/gene_chooser/abstract_gene_chooser.hpp"
+#include "base_repertoire/gene_chooser/config_based_getter.hpp"
+#include "base_repertoire/nucleotides_remover/abstract_nucleotides_remover.hpp"
+#include "base_repertoire/nucleotides_remover/config_based_getter.hpp"
+#include "base_repertoire/p_nucleotides_creator/abstract_nucleotides_creator.hpp"
+#include "base_repertoire/p_nucleotides_creator/config_based_getter.hpp"
+#include "base_repertoire/n_nucleotides_inserter/abstract_n_nucleotides_inserter.hpp"
+#include "base_repertoire/n_nucleotides_inserter/config_based_getter.hpp"
+#include "base_repertoire/metaroot/metaroot.hpp"
+#include "germline_db_labeler.hpp"
+#include "germline_db_labeling.hpp"
+#include "cdr_config.hpp"
+#include "base_repertoire/productivity_checker/productivity_checker.hpp"
+
+namespace ig_simulator {
+
+class AbstractMetarootCreator {
+protected:
+ // TODO
+ // Databases are not declared `const` since cdr_labeler requires: see germline_db_labeler.hpp
+ // @code: DbCDRLabeling GermlineDbLabeler::ComputeLabeling();
+ // is not declared const
+ germline_utils::CustomGeneDatabase * v_db_p;
+ germline_utils::CustomGeneDatabase * j_db_p;
+
+ const double prob_cleavage_v;
+ const double prob_cleavage_j;
+
+ const AbstractVDJGeneChooserCPtr gene_chooser_p;
+ const AbstractNucleotidesRemoverCPtr nucl_remover_p;
+ const AbstractPNucleotidesCreatorCPtr nucl_creator_p;
+ const AbstractNNucleotidesInserterCPtr nucl_inserter_p;
+
+ const cdr_labeler::DbCDRLabeling v_cdr_db;
+ const cdr_labeler::DbCDRLabeling j_cdr_db;
+
+ const ProductivityChecker productivity_checker;
+
+public:
+ AbstractMetarootCreator(const MetarootSimulationParams& config,
+ std::vector& db,
+ AbstractVDJGeneChooserCPtr&& gene_chooser);
+
+ AbstractMetarootCreator() = delete;
+ AbstractMetarootCreator(const AbstractMetarootCreator&) = delete;
+ AbstractMetarootCreator(AbstractMetarootCreator&&) = delete;
+ AbstractMetarootCreator& operator=(const AbstractMetarootCreator&) = delete;
+ AbstractMetarootCreator& operator=(AbstractMetarootCreator&) = delete;
+
+ virtual AbstractMetarootCPtr Createroot() const = 0;
+ virtual ~AbstractMetarootCreator() { }
+};
+using AbstractMetarootCreatorCPtr = std::unique_ptr;
+
+
+class VJMetarootCreator final : public AbstractMetarootCreator {
+public:
+
+ VJMetarootCreator(const MetarootSimulationParams& config,
+ std::vector& db);
+
+ AbstractMetarootCPtr Createroot() const override;
+};
+
+
+class VDJMetarootCreator final : public AbstractMetarootCreator {
+private:
+ germline_utils::CustomGeneDatabase * d_db_p;
+
+ const double prob_cleavage_d_left;
+ const double prob_cleavage_d_right;
+
+public:
+ VDJMetarootCreator(const MetarootSimulationParams& config,
+ std::vector& db);
+
+ AbstractMetarootCPtr Createroot() const override;
+};
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/multiplicity_creator/multiplicity_creator.cpp b/src/ig_simulator/base_repertoire/multiplicity_creator/multiplicity_creator.cpp
new file mode 100644
index 00000000..9c4db30d
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/multiplicity_creator/multiplicity_creator.cpp
@@ -0,0 +1,16 @@
+//
+// Created by Andrew Bzikadze on 3/27/17.
+//
+
+#include "multiplicity_creator.hpp"
+
+namespace ig_simulator {
+
+AbstractMultiplicityCreatorPtr get_multiplicity_creator(const MultiplicityCreatorParams &config) {
+ if (config.method == MultiplicityCreatorMethod::Geometric) {
+ return AbstractMultiplicityCreatorPtr(new GeometricMultiplicityCreator(config.geometric_params));
+ }
+ VERIFY(false);
+}
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/multiplicity_creator/multiplicity_creator.hpp b/src/ig_simulator/base_repertoire/multiplicity_creator/multiplicity_creator.hpp
new file mode 100644
index 00000000..da969e0a
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/multiplicity_creator/multiplicity_creator.hpp
@@ -0,0 +1,55 @@
+//
+// Created by Andrew Bzikadze on 3/27/17.
+//
+
+#pragma once
+
+#include
+#include
+
+#include "verify.hpp"
+
+#include "ig_simulator_utils.hpp"
+#include "random_generator.hpp"
+#include "ig_simulator_config.hpp"
+
+namespace ig_simulator {
+
+class AbstractMultiplicityCreator {
+public:
+ AbstractMultiplicityCreator() = default;
+ AbstractMultiplicityCreator(const AbstractMultiplicityCreator&) = delete;
+ AbstractMultiplicityCreator(AbstractMultiplicityCreator&&) = delete;
+ AbstractMultiplicityCreator& operator=(const AbstractMultiplicityCreator&) = delete;
+ AbstractMultiplicityCreator& operator=(AbstractMultiplicityCreator&&) = delete;
+
+ virtual size_t RandomMultiplicity() = 0;
+};
+
+using AbstractMultiplicityCreatorPtr = std::unique_ptr;
+
+class GeometricMultiplicityCreator final : public AbstractMultiplicityCreator {
+private:
+ double lambda;
+ std::geometric_distribution distribution;
+
+public:
+ GeometricMultiplicityCreator(double lambda):
+ lambda(lambda),
+ distribution(check_numeric_positive(lambda))
+ { }
+
+ GeometricMultiplicityCreator(const MultiplicityCreatorParams::GeometricParams &config):
+ GeometricMultiplicityCreator(config.lambda)
+ { }
+
+ size_t RandomMultiplicity() override {
+ return distribution(MTSingleton::GetInstance()) + 1;
+ }
+
+ double Mean() const { return 1. / lambda + 1; }
+};
+
+AbstractMultiplicityCreatorPtr get_multiplicity_creator(const MultiplicityCreatorParams &config);
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/n_nucleotides_inserter/abstract_n_nucleotides_inserter.cpp b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/abstract_n_nucleotides_inserter.cpp
new file mode 100644
index 00000000..5f4bbbbe
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/abstract_n_nucleotides_inserter.cpp
@@ -0,0 +1,5 @@
+//
+// Created by Andrew Bzikadze on 3/20/17.
+//
+
+#include "abstract_n_nucleotides_inserter.hpp"
diff --git a/src/ig_simulator/base_repertoire/n_nucleotides_inserter/abstract_n_nucleotides_inserter.hpp b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/abstract_n_nucleotides_inserter.hpp
new file mode 100644
index 00000000..7221962b
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/abstract_n_nucleotides_inserter.hpp
@@ -0,0 +1,30 @@
+//
+// Created by Andrew Bzikadze on 3/20/17.
+//
+
+#pragma once
+
+#include
+#include
+#include
+
+namespace ig_simulator {
+
+class AbstractNNucleotidesInserter {
+public:
+ virtual seqan::Dna5String GetVJInsertion() const = 0;
+ virtual seqan::Dna5String GetVDInsertion() const = 0;
+ virtual seqan::Dna5String GetDJInsertion() const = 0;
+
+ AbstractNNucleotidesInserter() = default;
+ AbstractNNucleotidesInserter(const AbstractNNucleotidesInserter&) = delete;
+ AbstractNNucleotidesInserter(AbstractNNucleotidesInserter&&) = delete;
+ AbstractNNucleotidesInserter& operator=(const AbstractNNucleotidesInserter&) = delete;
+ AbstractNNucleotidesInserter& operator=(AbstractNNucleotidesInserter&&) = delete;
+
+ virtual ~AbstractNNucleotidesInserter() { }
+};
+
+using AbstractNNucleotidesInserterCPtr = std::unique_ptr;
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/n_nucleotides_inserter/config_based_getter.cpp b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/config_based_getter.cpp
new file mode 100644
index 00000000..70aa1e70
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/config_based_getter.cpp
@@ -0,0 +1,20 @@
+//
+// Created by Andrew Bzikadze on 3/31/17.
+//
+
+#include "config_based_getter.hpp"
+#include "abstract_n_nucleotides_inserter.hpp"
+#include "uniform_n_nucleotides_inserter.hpp"
+
+
+namespace ig_simulator {
+
+AbstractNNucleotidesInserterCPtr get_nucleotides_inserter(const NNucleotidesInserterParams & config)
+{
+ if (config.method == NNucleotidesInserterMethod::Uniform)
+ return AbstractNNucleotidesInserterCPtr(new UniformNNucleotidesInserter(config.uniform_inserter_params));
+ VERIFY(false);
+}
+
+} // End namespace ig_simulator
+
diff --git a/src/ig_simulator/base_repertoire/n_nucleotides_inserter/config_based_getter.hpp b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/config_based_getter.hpp
new file mode 100644
index 00000000..b24fbab6
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/config_based_getter.hpp
@@ -0,0 +1,14 @@
+//
+// Created by Andrew Bzikadze on 3/31/17.
+//
+
+#pragma once
+
+#include "abstract_n_nucleotides_inserter.hpp"
+#include "ig_simulator_config.hpp"
+
+namespace ig_simulator {
+
+AbstractNNucleotidesInserterCPtr get_nucleotides_inserter(const NNucleotidesInserterParams & config);
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/n_nucleotides_inserter/uniform_n_nucleotides_inserter.cpp b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/uniform_n_nucleotides_inserter.cpp
new file mode 100644
index 00000000..7a2fa60c
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/uniform_n_nucleotides_inserter.cpp
@@ -0,0 +1,29 @@
+//
+// Created by Andrew Bzikadze on 3/20/17.
+//
+
+#include
+#include "uniform_n_nucleotides_inserter.hpp"
+#include "simulation_routines.hpp"
+
+using seqan::Dna5String;
+
+namespace ig_simulator {
+
+Dna5String UniformNNucleotidesInserter::RandDna5Str(size_t size) const {
+ auto RandomNucleotide = []() -> char {
+ return "ACGT"[random_index(0, 3)];
+ };
+
+ std::vector v_str(size);
+ for (auto & nucl : v_str) {
+ nucl = RandomNucleotide();
+ }
+ return Dna5String(std::string(v_str.begin(), v_str.end()));
+}
+
+Dna5String UniformNNucleotidesInserter::GetVJInsertion() const { return RandDna5Str(random_index(0, max_vj_insertion)); }
+Dna5String UniformNNucleotidesInserter::GetVDInsertion() const { return RandDna5Str(random_index(0, max_vd_insertion)); }
+Dna5String UniformNNucleotidesInserter::GetDJInsertion() const { return RandDna5Str(random_index(0, max_dj_insertion)); }
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/n_nucleotides_inserter/uniform_n_nucleotides_inserter.hpp b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/uniform_n_nucleotides_inserter.hpp
new file mode 100644
index 00000000..74620a98
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/uniform_n_nucleotides_inserter.hpp
@@ -0,0 +1,33 @@
+//
+// Created by Andrew Bzikadze on 3/20/17.
+//
+
+#pragma once
+
+#include "abstract_n_nucleotides_inserter.hpp"
+#include "ig_simulator_config.hpp"
+
+namespace ig_simulator {
+
+class UniformNNucleotidesInserter final : public AbstractNNucleotidesInserter {
+private:
+ const size_t max_vj_insertion;
+ const size_t max_vd_insertion;
+ const size_t max_dj_insertion;
+
+ seqan::Dna5String RandDna5Str(size_t size) const;
+
+public:
+ explicit UniformNNucleotidesInserter(
+ const NNucleotidesInserterParams::UniformInserterParams config):
+ max_vj_insertion(config.max_vj_insertion),
+ max_vd_insertion(config.max_vd_insertion),
+ max_dj_insertion(config.max_dj_insertion)
+ { }
+
+ seqan::Dna5String GetVJInsertion() const override;
+ seqan::Dna5String GetVDInsertion() const override;
+ seqan::Dna5String GetDJInsertion() const override;
+};
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/nucleotides_remover/abstract_nucleotides_remover.cpp b/src/ig_simulator/base_repertoire/nucleotides_remover/abstract_nucleotides_remover.cpp
new file mode 100644
index 00000000..0a3196cc
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/nucleotides_remover/abstract_nucleotides_remover.cpp
@@ -0,0 +1,5 @@
+//
+// Created by Andrew Bzikadze on 3/17/17.
+//
+
+#include "abstract_nucleotides_remover.hpp"
diff --git a/src/ig_simulator/base_repertoire/nucleotides_remover/abstract_nucleotides_remover.hpp b/src/ig_simulator/base_repertoire/nucleotides_remover/abstract_nucleotides_remover.hpp
new file mode 100644
index 00000000..d7db5921
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/nucleotides_remover/abstract_nucleotides_remover.hpp
@@ -0,0 +1,30 @@
+//
+// Created by Andrew Bzikadze on 3/17/17.
+//
+
+#pragma once
+
+#include
+#include
+
+namespace ig_simulator {
+
+class AbstractNucleotidesRemover {
+public:
+ virtual size_t RemoveInVGene() const = 0;
+ virtual size_t RemoveInDGeneLeft() const = 0;
+ virtual size_t RemoveInDGeneRight() const = 0;
+ virtual size_t RemoveInJGene() const = 0;
+
+ AbstractNucleotidesRemover() = default;
+ AbstractNucleotidesRemover(const AbstractNucleotidesRemover&) = delete;
+ AbstractNucleotidesRemover(AbstractNucleotidesRemover&&) = delete;
+ AbstractNucleotidesRemover& operator=(const AbstractNucleotidesRemover&) = delete;
+ AbstractNucleotidesRemover& operator=(AbstractNucleotidesRemover&&) = delete;
+
+ virtual ~AbstractNucleotidesRemover() { }
+};
+
+using AbstractNucleotidesRemoverCPtr = std::unique_ptr;
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/nucleotides_remover/config_based_getter.cpp b/src/ig_simulator/base_repertoire/nucleotides_remover/config_based_getter.cpp
new file mode 100644
index 00000000..670de9bd
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/nucleotides_remover/config_based_getter.cpp
@@ -0,0 +1,19 @@
+//
+// Created by Andrew Bzikadze on 3/31/17.
+//
+
+#include "config_based_getter.hpp"
+#include "uniform_nucleotides_remover.hpp"
+
+
+namespace ig_simulator {
+
+AbstractNucleotidesRemoverCPtr get_nucleotides_remover(const NucleotidesRemoverParams & config)
+{
+ if (config.method == NucleotidesRemoverMethod::Uniform)
+ return AbstractNucleotidesRemoverCPtr(new UniformNucleotidesRemover(config.uniform_remover_params));
+ VERIFY(false);
+}
+
+} // End namespace ig_simulator
+
diff --git a/src/ig_simulator/base_repertoire/nucleotides_remover/config_based_getter.hpp b/src/ig_simulator/base_repertoire/nucleotides_remover/config_based_getter.hpp
new file mode 100644
index 00000000..5334ac31
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/nucleotides_remover/config_based_getter.hpp
@@ -0,0 +1,14 @@
+//
+// Created by Andrew Bzikadze on 3/31/17.
+//
+
+#pragma once
+
+#include "abstract_nucleotides_remover.hpp"
+#include "ig_simulator_config.hpp"
+
+namespace ig_simulator {
+
+AbstractNucleotidesRemoverCPtr get_nucleotides_remover(const NucleotidesRemoverParams & config);
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/nucleotides_remover/uniform_nucleotides_remover.cpp b/src/ig_simulator/base_repertoire/nucleotides_remover/uniform_nucleotides_remover.cpp
new file mode 100644
index 00000000..ea689c41
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/nucleotides_remover/uniform_nucleotides_remover.cpp
@@ -0,0 +1,15 @@
+//
+// Created by Andrew Bzikadze on 3/17/17.
+//
+
+#include "uniform_nucleotides_remover.hpp"
+#include "simulation_routines.hpp"
+
+namespace ig_simulator {
+
+size_t UniformNucleotidesRemover::RemoveInVGene() const { return random_index(0, max_remove_v_gene); }
+size_t UniformNucleotidesRemover::RemoveInDGeneLeft() const { return random_index(0, max_remove_d_gene_left); }
+size_t UniformNucleotidesRemover::RemoveInDGeneRight() const { return random_index(0, max_remove_d_gene_right); }
+size_t UniformNucleotidesRemover::RemoveInJGene() const { return random_index(0, max_remove_j_gene); }
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/nucleotides_remover/uniform_nucleotides_remover.hpp b/src/ig_simulator/base_repertoire/nucleotides_remover/uniform_nucleotides_remover.hpp
new file mode 100644
index 00000000..c7e6e4e8
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/nucleotides_remover/uniform_nucleotides_remover.hpp
@@ -0,0 +1,38 @@
+//
+// Created by Andrew Bzikadze on 3/17/17.
+//
+
+#pragma once
+
+#include
+#include "abstract_nucleotides_remover.hpp"
+#include "ig_simulator_config.hpp"
+
+namespace ig_simulator {
+
+class UniformNucleotidesRemover final : public AbstractNucleotidesRemover {
+private:
+ const size_t max_remove_v_gene;
+ const size_t max_remove_d_gene_left;
+ const size_t max_remove_d_gene_right;
+ const size_t max_remove_j_gene;
+
+public:
+ explicit UniformNucleotidesRemover(
+ const NucleotidesRemoverParams::UniformRemoverParams config) :
+ AbstractNucleotidesRemover(),
+ max_remove_v_gene(config.max_remove_v_gene),
+ max_remove_d_gene_left(config.max_remove_d_gene_left),
+ max_remove_d_gene_right(config.max_remove_d_gene_right),
+ max_remove_j_gene(config.max_remove_j_gene)
+ { }
+
+ virtual size_t RemoveInVGene() const override;
+ virtual size_t RemoveInDGeneLeft() const override;
+ virtual size_t RemoveInDGeneRight() const override;
+ virtual size_t RemoveInJGene() const override;
+
+ virtual ~UniformNucleotidesRemover() { }
+};
+
+} // End namespace ig_simulator
\ No newline at end of file
diff --git a/src/ig_simulator/base_repertoire/p_nucleotides_creator/abstract_nucleotides_creator.cpp b/src/ig_simulator/base_repertoire/p_nucleotides_creator/abstract_nucleotides_creator.cpp
new file mode 100644
index 00000000..a979c5b4
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/p_nucleotides_creator/abstract_nucleotides_creator.cpp
@@ -0,0 +1,6 @@
+//
+// Created by Andrew Bzikadze on 3/20/17.
+//
+
+#include "abstract_nucleotides_creator.hpp"
+
diff --git a/src/ig_simulator/base_repertoire/p_nucleotides_creator/abstract_nucleotides_creator.hpp b/src/ig_simulator/base_repertoire/p_nucleotides_creator/abstract_nucleotides_creator.hpp
new file mode 100644
index 00000000..386ae038
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/p_nucleotides_creator/abstract_nucleotides_creator.hpp
@@ -0,0 +1,30 @@
+//
+// Created by Andrew Bzikadze on 3/20/17.
+//
+
+#pragma once
+
+#include
+#include
+
+namespace ig_simulator {
+
+class AbstractPNucleotidesCreator {
+public:
+ virtual size_t CreateInVGene() const = 0;
+ virtual size_t CreateInDGeneLeft() const = 0;
+ virtual size_t CreateInDGeneRight() const = 0;
+ virtual size_t CreateInJGene() const = 0;
+
+ AbstractPNucleotidesCreator() = default;
+ AbstractPNucleotidesCreator(const AbstractPNucleotidesCreator&) = delete;
+ AbstractPNucleotidesCreator(AbstractPNucleotidesCreator&&) = delete;
+ AbstractPNucleotidesCreator& operator=(const AbstractPNucleotidesCreator&) = delete;
+ AbstractPNucleotidesCreator& operator=(AbstractPNucleotidesCreator&&) = delete;
+
+ virtual ~AbstractPNucleotidesCreator() { }
+};
+
+using AbstractPNucleotidesCreatorCPtr = std::unique_ptr;
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/p_nucleotides_creator/config_based_getter.cpp b/src/ig_simulator/base_repertoire/p_nucleotides_creator/config_based_getter.cpp
new file mode 100644
index 00000000..04938e45
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/p_nucleotides_creator/config_based_getter.cpp
@@ -0,0 +1,19 @@
+//
+// Created by Andrew Bzikadze on 3/31/17.
+//
+
+#include "config_based_getter.hpp"
+#include "uniform_nucleotides_creator.hpp"
+
+
+namespace ig_simulator {
+
+AbstractPNucleotidesCreatorCPtr get_nucleotides_creator(const PNucleotidesCreatorParams &config)
+{
+ if (config.method == PNucleotidesCreatorMethod::Uniform)
+ return AbstractPNucleotidesCreatorCPtr(new UniformPNucleotidesCreator(config.uniform_creator_params));
+ VERIFY(false);
+}
+
+} // End namespace ig_simulator
+
diff --git a/src/ig_simulator/base_repertoire/p_nucleotides_creator/config_based_getter.hpp b/src/ig_simulator/base_repertoire/p_nucleotides_creator/config_based_getter.hpp
new file mode 100644
index 00000000..4115dbc7
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/p_nucleotides_creator/config_based_getter.hpp
@@ -0,0 +1,14 @@
+//
+// Created by Andrew Bzikadze on 3/31/17.
+//
+
+#pragma once
+
+#include "abstract_nucleotides_creator.hpp"
+#include "ig_simulator_config.hpp"
+
+namespace ig_simulator {
+
+AbstractPNucleotidesCreatorCPtr get_nucleotides_creator(const PNucleotidesCreatorParams &config);
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/p_nucleotides_creator/uniform_nucleotides_creator.cpp b/src/ig_simulator/base_repertoire/p_nucleotides_creator/uniform_nucleotides_creator.cpp
new file mode 100644
index 00000000..e60eb0b4
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/p_nucleotides_creator/uniform_nucleotides_creator.cpp
@@ -0,0 +1,15 @@
+//
+// Created by Andrew Bzikadze on 3/20/17.
+//
+
+#include "uniform_nucleotides_creator.hpp"
+#include "simulation_routines.hpp"
+
+namespace ig_simulator {
+
+size_t UniformPNucleotidesCreator::CreateInVGene() const { return random_index(0, max_create_v_gene); }
+size_t UniformPNucleotidesCreator::CreateInDGeneLeft() const { return random_index(0, max_create_d_gene_left); }
+size_t UniformPNucleotidesCreator::CreateInDGeneRight() const { return random_index(0, max_create_d_gene_right); }
+size_t UniformPNucleotidesCreator::CreateInJGene() const { return random_index(0, max_create_j_gene); }
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/p_nucleotides_creator/uniform_nucleotides_creator.hpp b/src/ig_simulator/base_repertoire/p_nucleotides_creator/uniform_nucleotides_creator.hpp
new file mode 100644
index 00000000..ac84fa1f
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/p_nucleotides_creator/uniform_nucleotides_creator.hpp
@@ -0,0 +1,34 @@
+//
+// Created by Andrew Bzikadze on 3/20/17.
+//
+
+#pragma once
+
+#include "abstract_nucleotides_creator.hpp"
+#include "ig_simulator_config.hpp"
+
+namespace ig_simulator {
+
+class UniformPNucleotidesCreator final : public AbstractPNucleotidesCreator {
+private:
+ size_t max_create_v_gene;
+ size_t max_create_d_gene_left;
+ size_t max_create_d_gene_right;
+ size_t max_create_j_gene;
+
+public:
+ explicit UniformPNucleotidesCreator(
+ const PNucleotidesCreatorParams::UniformCreatorParams config) :
+ max_create_v_gene(config.max_create_v_gene),
+ max_create_d_gene_left(config.max_create_d_gene_left),
+ max_create_d_gene_right(config.max_create_d_gene_right),
+ max_create_j_gene(config.max_create_j_gene)
+ { }
+
+ virtual size_t CreateInVGene() const override;
+ virtual size_t CreateInDGeneLeft() const override;
+ virtual size_t CreateInDGeneRight() const override;
+ virtual size_t CreateInJGene() const override;
+};
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/base_repertoire/productivity_checker/productivity_checker.cpp b/src/ig_simulator/base_repertoire/productivity_checker/productivity_checker.cpp
new file mode 100644
index 00000000..acd521cd
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/productivity_checker/productivity_checker.cpp
@@ -0,0 +1,17 @@
+//
+// Created by Andrew Bzikadze on 3/27/17.
+//
+
+#include "productivity_checker.hpp"
+
+namespace ig_simulator {
+
+bool ProductivityChecker::IsProductive(const AbstractMetaroot& root) const {
+ if (root.CDRLabeling().Empty())
+ return false;
+ core::Read read("", root.Sequence(), 0);
+ auto aa = aa_calculator->ComputeAminoAcidAnnotation(read, root.CDRLabeling());
+ return not aa.HasStopCodon() and aa.InFrame();
+}
+
+} // End namespace ig_simulator
\ No newline at end of file
diff --git a/src/ig_simulator/base_repertoire/productivity_checker/productivity_checker.hpp b/src/ig_simulator/base_repertoire/productivity_checker/productivity_checker.hpp
new file mode 100644
index 00000000..10d997a3
--- /dev/null
+++ b/src/ig_simulator/base_repertoire/productivity_checker/productivity_checker.hpp
@@ -0,0 +1,34 @@
+//
+// Created by Andrew Bzikadze on 3/27/17.
+//
+
+#pragma once
+
+#include "base_repertoire/metaroot/metaroot.hpp"
+#include "annotation_utils/aa_annotation/aa_calculator.hpp"
+
+namespace ig_simulator {
+
+class ProductivityChecker {
+private:
+ const annotation_utils::BaseAACalculatorPtr aa_calculator;
+
+public:
+ explicit ProductivityChecker(annotation_utils::BaseAACalculatorPtr aa_calculator =
+ annotation_utils::BaseAACalculatorPtr(new annotation_utils::SimpleAACalculator())):
+ aa_calculator(std::move(aa_calculator))
+ { }
+
+ bool IsProductive(const AbstractMetaroot& root) const;
+
+ bool IsProductive(const AbstractMetarootCPtr& root) const {
+ return IsProductive(*check_pointer(root));
+ }
+
+ ProductivityChecker(const ProductivityChecker&) = delete;
+ ProductivityChecker(ProductivityChecker&&) = delete;
+ ProductivityChecker& operator=(const ProductivityChecker&) = delete;
+ ProductivityChecker& operator=(ProductivityChecker&&) = delete;
+};
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/clonal_trees/fast_stop_codon_checker/fast_stop_codon_checker.cpp b/src/ig_simulator/clonal_trees/fast_stop_codon_checker/fast_stop_codon_checker.cpp
new file mode 100644
index 00000000..7b06dedf
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/fast_stop_codon_checker/fast_stop_codon_checker.cpp
@@ -0,0 +1,11 @@
+//
+// Created by Andrew Bzikadze on 4/25/17.
+//
+
+#include "fast_stop_codon_checker.hpp"
+
+namespace ig_simulator {
+
+constexpr std::array FastStopCodonChecker::stop_codons_hashes;
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/clonal_trees/fast_stop_codon_checker/fast_stop_codon_checker.hpp b/src/ig_simulator/clonal_trees/fast_stop_codon_checker/fast_stop_codon_checker.hpp
new file mode 100644
index 00000000..68bdefac
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/fast_stop_codon_checker/fast_stop_codon_checker.hpp
@@ -0,0 +1,81 @@
+//
+// Created by Andrew Bzikadze on 4/25/17.
+//
+
+#pragma once
+
+#include
+#include
+#include
+#include
+
+namespace ig_simulator {
+
+class FastStopCodonCheckerDetails {
+friend class FastStopCodonChecker;
+private:
+ constexpr static unsigned get_hash(const char *s,
+ const unsigned hash_base,
+ const unsigned hash_base_sq) {
+ return s[0] + hash_base * s[1] + hash_base_sq * s[2];
+ }
+
+ static unsigned get_hash(std::string &&s,
+ const unsigned hash_base,
+ const unsigned hash_base_sq) {
+ return get_hash(s.c_str(), hash_base, hash_base_sq);
+ }
+};
+
+class FastStopCodonChecker {
+private:
+ constexpr const static unsigned hash_base { 10 };
+ constexpr const static unsigned hash_base_sq { hash_base * hash_base };
+
+ /**
+ * I have to use the following hack and refrain from using std::array for stop_codons because
+ * on OSX El Capitan neigher std::array::operator[] nor std::array std::get
+ * are not declared constexpr.
+ * The following code should be used instead in the future:
+ * @code
+ * constexpr const static std::array stop_codons { "TAG", "TAA", "TGA" };
+ * constexpr const static std::array stop_codons_hashes
+ * {{
+ * sc_checker_details::get_hash(std::get<0>(stop_codons), hash_base, hash_base_sq),
+ * sc_checker_details::get_hash(std::get<1>(stop_codons), hash_base, hash_base_sq),
+ * sc_checker_details::get_hash(std::get<2>(stop_codons), hash_base, hash_base_sq)
+ * }};
+ */
+ constexpr const static std::array stop_codons_hashes
+ {{
+ FastStopCodonCheckerDetails::get_hash("TAG", hash_base, hash_base_sq),
+ FastStopCodonCheckerDetails::get_hash("TAA", hash_base, hash_base_sq),
+ FastStopCodonCheckerDetails::get_hash("TGA", hash_base, hash_base_sq)
+ }};
+
+public:
+ bool static HasStopCodon(const std::string& str, size_t orf) {
+ for(size_t i = orf; i + 2 < str.length(); i += 3) {
+ size_t hash = FastStopCodonCheckerDetails::get_hash(str.substr(i, 3),
+ hash_base, hash_base_sq);
+ if (std::find(stop_codons_hashes.begin(), stop_codons_hashes.end(), hash)
+ != stop_codons_hashes.end())
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ bool static HasStopCodon(const std::string& str, const annotation_utils::CDRLabeling& labeling) {
+ return HasStopCodon(str, labeling.cdr1.start_pos % 3);
+ }
+
+ FastStopCodonChecker() = delete;
+ FastStopCodonChecker(const FastStopCodonChecker&) = delete;
+ FastStopCodonChecker(FastStopCodonChecker&&) = delete;
+ FastStopCodonChecker& operator=(const FastStopCodonChecker&) = delete;
+ FastStopCodonChecker& operator=(FastStopCodonChecker&&) = delete;
+};
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/clonal_trees/forest/forest.cpp b/src/ig_simulator/clonal_trees/forest/forest.cpp
new file mode 100644
index 00000000..1bbb1fc2
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/forest/forest.cpp
@@ -0,0 +1,21 @@
+//
+// Created by Andrew Bzikadze on 4/9/17.
+//
+
+#include "forest.hpp"
+
+namespace ig_simulator {
+
+std::ostream& operator<<(std::ostream& out, const Forest& forest) {
+ for(size_t i = 0; i < forest.trees.size(); ++i) {
+ const auto& tree = forest.trees[i];
+ out << "===============================================\n";
+ out << "Tree # " << i + 1 << " / " << forest.trees.size() << '\n';
+ out << "===============================================\n";
+ out << tree;
+ out << '\n';
+ }
+ return out;
+}
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/clonal_trees/forest/forest.hpp b/src/ig_simulator/clonal_trees/forest/forest.hpp
new file mode 100644
index 00000000..8636fbbe
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/forest/forest.hpp
@@ -0,0 +1,42 @@
+//
+// Created by Andrew Bzikadze on 4/9/17.
+//
+
+#pragma once
+
+#include "clonal_trees/tree/tree.hpp"
+#include "base_repertoire/metaroot_cluster/metaroot_cluster.hpp"
+
+namespace ig_simulator {
+
+class Forest {
+private:
+ const MetarootCluster* metaroot_cluster;
+ const std::vector trees;
+
+public:
+ Forest(const MetarootCluster* const metaroot_cluster,
+ std::vector&& trees = {}) noexcept:
+ metaroot_cluster(metaroot_cluster),
+ trees(trees)
+ { }
+
+ Forest(const Forest&) = default;
+ Forest(Forest&&) = default;
+
+ Forest& operator=(const Forest&) = default;
+ Forest& operator=(Forest&&) = default;
+
+ const MetarootCluster* GetMetarootCluster() const { return metaroot_cluster; }
+ const std::vector& Trees() const { return trees; }
+
+ size_t Size() const { return trees.size(); }
+
+ friend std::ostream& operator<<(std::ostream&, const Forest&);
+};
+
+std::ostream& operator<<(std::ostream& out, const Forest&);
+
+using ForestStorage = std::vector;
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/clonal_trees/tree/node.cpp b/src/ig_simulator/clonal_trees/tree/node.cpp
new file mode 100644
index 00000000..9ee7d08f
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/tree/node.cpp
@@ -0,0 +1,5 @@
+//
+// Created by Andrew Bzikadze on 4/9/17.
+//
+
+#include "node.hpp"
diff --git a/src/ig_simulator/clonal_trees/tree/node.hpp b/src/ig_simulator/clonal_trees/tree/node.hpp
new file mode 100644
index 00000000..03d4e81c
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/tree/node.hpp
@@ -0,0 +1,55 @@
+//
+// Created by Andrew Bzikadze on 4/9/17.
+//
+
+#pragma once
+
+#include
+#include
+#include
+#include
+
+#include "seqan/basic.h"
+
+
+namespace ig_simulator {
+
+class Node {
+public:
+ using SHM_Vector = std::vector>;
+
+private:
+ const size_t parent_ind;
+
+ // We store only SHMs "on the edge" from the parent
+ const SHM_Vector shms;
+ bool included;
+ bool productive;
+
+public:
+ Node(size_t parent_ind = size_t(-1),
+ SHM_Vector&& shms = {},
+ bool included = true,
+ bool productive = true):
+ parent_ind(parent_ind),
+ shms(std::move(shms)),
+ included(included),
+ productive(productive)
+ { }
+
+ Node(const Node&) = default;
+ Node(Node&&) = default;
+ Node& operator=(const Node&) = default;
+ Node& operator=(Node&&) = default;
+
+ size_t ParentInd() const { return parent_ind; }
+ const SHM_Vector& SHMs() const { return shms; }
+
+ void Exclude() { included = false; }
+ void MakeNonProductive() { productive = false; }
+
+ bool IsIncluded() const { return included; }
+ bool IsProductive() const { return productive; }
+};
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/clonal_trees/tree/tree.cpp b/src/ig_simulator/clonal_trees/tree/tree.cpp
new file mode 100644
index 00000000..fe3310b5
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/tree/tree.cpp
@@ -0,0 +1,46 @@
+//
+// Created by Andrew Bzikadze on 4/9/17.
+//
+
+#include "tree.hpp"
+#include "verify.hpp"
+#include "annotation_utils/aa_annotation/aa_calculator.hpp"
+
+namespace ig_simulator {
+
+std::ostream& operator<<(std::ostream& out, const Tree& tree) {
+ VERIFY(tree.nodes.size() >= 1);
+
+ out << "digraph G {\n";
+ out << '\t' << 0 << " [shape = " << (tree.Metaroot()->IsProductive() ? "circle" : "box") << "," <<
+ "fillcolor = " << (tree.nodes.front().IsIncluded() ? "cyan" : "magenta") << "," <<
+ "style = filled,size=1]; // " <<
+ '(' << (tree.nodes.front().IsIncluded() ? "included" : "excluded") << ')' << ' ' <<
+ '(' << (tree.nodes.front().IsProductive() ? "productive" : "non-productive") << ')' << '\n';
+
+ for (size_t i = 1; i < tree.nodes.size(); ++i) {
+ const auto& node = tree.nodes[i];
+ const auto& shms = node.SHMs();
+ out << '\t' << i << " [shape = " << (node.IsProductive() ? "circle" : "box" ) << "," <<
+ "fillcolor = " << (node.IsIncluded() ? "cyan" : "magenta") << "," <<
+ "style = filled,size=1]; // " <<
+ '(' << (node.IsIncluded() ? "included" : "excluded") << ')' << ' ' <<
+ '(' << (node.IsProductive() ? "productive" : "non-productive") << ')' << '\n';
+ out << '\t' << node.ParentInd() << " -> " << i << "[minlen = " << std::to_string(shms.size()) << "]; // ";
+ out << "total shms: " << shms.size() << " ";
+ for(const auto& shm : shms) {
+ out << "(at " << std::get<0>(shm) <<
+ " from " << std::get<1>(shm) <<
+ " to " << std::get<2>(shm) << ')' << ' ';
+ }
+ out << '\n';
+
+ // annotation_utils::SimpleAACalculator aa_calculator;
+ // core::Read read("", tree.Sequences()[node.ParentInd()], 0);
+ // VERIFY(not aa_calculator.ComputeAminoAcidAnnotation(read, tree.Metaroot()->CDRLabeling()).HasStopCodon());
+ }
+ out << "}\n";
+ return out;
+}
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/clonal_trees/tree/tree.hpp b/src/ig_simulator/clonal_trees/tree/tree.hpp
new file mode 100644
index 00000000..491c1f3f
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/tree/tree.hpp
@@ -0,0 +1,46 @@
+//
+// Created by Andrew Bzikadze on 4/9/17.
+//
+
+#pragma once
+
+#include "node.hpp"
+#include "base_repertoire/metaroot/metaroot.hpp"
+
+namespace ig_simulator {
+
+class Tree {
+ const AbstractMetaroot* metaroot;
+ const std::vector nodes;
+ const std::vector sequences;
+
+public:
+ Tree(const AbstractMetaroot* const metaroot,
+ std::vector&& nodes = {},
+ std::vector&& sequences = {}) noexcept:
+ metaroot(metaroot),
+ nodes(std::move(nodes)),
+ sequences(std::move(sequences))
+ { }
+
+ Tree(const Tree&) = default;
+ Tree(Tree&&) = default;
+
+ Tree& operator=(const Tree&) = default;
+ Tree& operator=(Tree&&) = default;
+
+ size_t Size() const { return nodes.size(); }
+ const AbstractMetaroot* Metaroot() const { return metaroot; }
+
+ const std::vector& Sequences() const { return sequences; }
+
+ bool IsNodeIncluded(size_t node_ind) const {
+ return nodes[node_ind].IsIncluded();
+ }
+
+ friend std::ostream& operator<<(std::ostream& out, const Tree& tree);
+};
+
+std::ostream& operator<<(std::ostream& out, const Tree& tree);
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/clonal_trees/tree_creator/cartesian_tree.cpp b/src/ig_simulator/clonal_trees/tree_creator/cartesian_tree.cpp
new file mode 100644
index 00000000..789c0aea
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/tree_creator/cartesian_tree.cpp
@@ -0,0 +1,5 @@
+//
+// Created by Andrew Bzikadze on 4/10/17.
+//
+
+#include "cartesian_tree.hpp"
diff --git a/src/ig_simulator/clonal_trees/tree_creator/cartesian_tree.hpp b/src/ig_simulator/clonal_trees/tree_creator/cartesian_tree.hpp
new file mode 100644
index 00000000..24b1d646
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/tree_creator/cartesian_tree.hpp
@@ -0,0 +1,249 @@
+//
+// Created by Andrew Bzikadze on 4/10/17.
+//
+
+#pragma once
+
+#include
+#include
+#include "verify.hpp"
+#include "simulation_routines.hpp"
+
+namespace ig_simulator {
+
+template
+class Treap {
+private:
+ struct TreapNode;
+ // TODO change to unique_ptr
+ // using TreapNodePtr = std::shared_ptr;
+ using TreapNodePtr = TreapNode*;
+
+ struct TreapNode {
+ // @field key -- index in Tree (unique)
+ // @field freq -- the "rational" probability in discrete distribution
+ // @field sum -- sum of freq in the subtree possesing @this as a root
+ // @field prior -- normally random priority for the heap
+ KeyType key;
+ FreqType freq, sum;
+ PriorType prior;
+
+ TreapNodePtr left, right;
+
+ TreapNode(KeyType key, FreqType freq,
+ PriorType prior,
+ TreapNodePtr left = nullptr, TreapNodePtr right = nullptr) :
+ key(key), freq(freq), sum(freq),
+ prior(prior),
+ left(left), right(right)
+ { }
+
+ ~TreapNode() {
+ delete left;
+ delete right;
+ }
+
+ static FreqType Sum(const TreapNodePtr &t) {
+ if (t != nullptr)
+ return t->sum;
+ return 0;
+ }
+
+ static void Upd(TreapNodePtr &t) {
+ if (t != nullptr)
+ t->sum = Sum(t->left) + Sum(t->right) + t->freq;
+ }
+ };
+
+ TreapNodePtr root;
+ size_t treap_size;
+
+private:
+ static void Merge(TreapNodePtr *pt, TreapNodePtr &l, TreapNodePtr &r) {
+ if (l == nullptr)
+ *pt = r;
+ else if (r == nullptr)
+ *pt = l;
+ else if (l->prior < r->prior) {
+ Merge(&l->right, l->right, r);
+ *pt = l;
+ } else {
+ Merge(&r->left, l, r->left);
+ *pt = r;
+ }
+ TreapNode::Upd(*pt);
+ // Check(*pt);
+ }
+
+ static void Split(TreapNodePtr t, KeyType key, TreapNodePtr *l, TreapNodePtr *r) {
+ if (t == nullptr) {
+ *l = nullptr;
+ *r = nullptr;
+ }
+ else if (t->key < key) {
+ Split(t->right, key, &t->right, r);
+ *l = t;
+ TreapNode::Upd(*l);
+ } else {
+ Split(t->left, key, l, &t->left);
+ *r = t;
+ TreapNode::Upd(*r);
+ }
+ // Check(t);
+ }
+
+ static void Check(TreapNodePtr t) {
+ if (t == nullptr)
+ return;
+ VERIFY(t->sum == TreapNode::Sum(t->left) + TreapNode::Sum(t->right) + t->freq);
+ Check(t->left);
+ Check(t->right);
+ }
+
+public:
+ Treap(): root(nullptr), treap_size(0) { }
+ ~Treap() { delete root; }
+
+ void Insert(KeyType key, FreqType freq, PriorType prior = random_index()) {
+ VERIFY(not Contains(key));
+ TreapNodePtr * pt = &root;
+ while (*pt and (*pt)->prior < prior) {
+ (*pt)->sum += freq;
+ if (key < (*pt)->key)
+ pt = &(*pt)->left;
+ else
+ pt = &(*pt)->right;
+ }
+ TreapNodePtr l, r;
+ Split(*pt, key, &l, &r);
+ *pt = TreapNodePtr(new TreapNode(key, freq, prior, l, r));
+ TreapNode::Upd(*pt);
+ treap_size++;
+ // Check();
+ }
+
+ void Erase(KeyType key, FreqType freq) {
+ TreapNodePtr * pt = &root;
+ while ((*pt)->key != key) {
+ (*pt)->sum -= freq;
+ if (key < (*pt)->key)
+ pt = &(*pt)->left;
+ else
+ pt = &(*pt)->right;
+ }
+ VERIFY_MSG((*pt)->freq == freq, (*pt)->freq << " " << freq);
+ TreapNodePtr p;
+ Merge(&p, (*pt)->left, (*pt)->right);
+ (*pt)->left = nullptr;
+ (*pt)->right = nullptr;
+ // TODO Fix bug with not setting to nullptr pointer of parent of *pt if *pt has no children
+ delete *pt;
+ *pt = p;
+ treap_size--;
+ // Check();
+ }
+
+ void Erase(KeyType key) {
+ FreqType freq = GetFreq(key);
+ Erase(key, freq);
+ }
+
+ KeyType FindBySum(FreqType sum) const {
+ TreapNodePtr t = root;
+ FreqType temp;
+ while((temp = TreapNode::Sum(t->right) + 1) != sum) {
+ if (temp > sum)
+ t = t->right;
+ else {
+ t = t->left;
+ sum -= temp;
+ }
+ }
+ return t->key;
+ }
+
+ bool Contains(KeyType key) const {
+ TreapNodePtr t = root;
+ while (t != nullptr) {
+ if (t->key == key) {
+ return true;
+ }
+ if (t->key > key) {
+ t = t->left;
+ } else {
+ t = t->right;
+ }
+ }
+ return false;
+ }
+
+ FreqType GetFreq(KeyType key) const {
+ VERIFY(Contains(key));
+ TreapNodePtr t = root;
+ while (t != nullptr) {
+ if (t->key == key) {
+ return t->freq;
+ }
+ if (t->key > key) {
+ t = t->left;
+ } else {
+ t = t->right;
+ }
+ }
+ VERIFY(false);
+ }
+
+ void SetFreq(KeyType key, FreqType old_freq, FreqType new_freq) {
+ TreapNodePtr t = root;
+ while(t->key != key) {
+ // if FreqType is unsigned `new_freq - old_freq` is dangerous
+ // std::cout << t->sum << " " << old_freq << "\n";
+ VERIFY_MSG(t->sum >= old_freq,
+ std::string("t->sum = ") + std::to_string(t->sum) +
+ ", old_freq = " + std::to_string(old_freq));
+ t->sum = t->sum - old_freq + new_freq;
+ if (t->key > key)
+ t = t->left;
+ else
+ t = t->right;
+ }
+ t->freq = new_freq;
+ // t->sum = t->sum - old_freq + new_freq;
+ TreapNode::Upd(t);
+ }
+
+ std::pair LowerBound(FreqType sum) const {
+ TreapNodePtr t = root;
+ FreqType sum_left, sum_right;
+
+ while(true) {
+ VERIFY_MSG(t != nullptr, std::string("Asked Sum: ") + std::to_string(sum) +
+ " Full Sum: " + std::to_string(Sum()));
+ sum_left = TreapNode::Sum(t->left);
+ sum_right = TreapNode::Sum(t->right);
+
+ if (sum_left + sum_right <= sum)
+ break;
+
+ if (sum_left > sum )
+ t = t->left;
+ else {
+ t = t->right;
+ sum -= sum_left;
+ }
+ }
+ return { t->key, t->freq };
+ }
+
+ FreqType Sum() const {
+ return TreapNode::Sum(root);
+ }
+
+ size_t Size() const { return treap_size; }
+
+ void Check() const {
+ Check(root);
+ }
+};
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/clonal_trees/tree_creator/exporters.cpp b/src/ig_simulator/clonal_trees/tree_creator/exporters.cpp
new file mode 100644
index 00000000..f158a9d4
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/tree_creator/exporters.cpp
@@ -0,0 +1,52 @@
+//
+// Created by Andrew Bzikadze on 4/14/17.
+//
+
+#include "exporters.hpp"
+
+namespace ig_simulator {
+
+void TreeExporter(const Tree& tree, size_t forest_ind, size_t tree_ind,
+ std::ostream& full, std::ostream& included)
+{
+ const auto sequences = tree.Sequences();
+ for (size_t i = 0; i < sequences.size(); ++i) {
+ std::stringstream id_ss;
+ id_ss << ">forest_" << forest_ind << "_tree_" << tree_ind << "_antibody_" << i;
+ std::string id { id_ss.str() };
+ full << id << '\n' << sequences[i] << '\n';
+ if (tree.IsNodeIncluded(i)) {
+ included << id << '\n' << sequences[i] << '\n';
+ }
+ }
+}
+
+void ForestExporter(const Forest& forest, size_t forest_ind, std::ostream& full, std::ostream& included) {
+ for (size_t i = 0; i < forest.Trees().size(); ++i) {
+ TreeExporter(forest.Trees()[i], forest_ind, i, full, included);
+ }
+}
+
+void ForestStorageExporter(const ForestStorage& forest_storage, std::ostream& full, std::ostream& included) {
+ for (size_t i = 0; i < forest_storage.size(); ++i) {
+ ForestExporter(forest_storage[i], i, full, included);
+ }
+}
+
+void EdgeListsExporters(const ForestStorage& forest_storage, const IgSimulatorConfig::IOParams::OutputParams& config) {
+ std::string path = path::append_path(config.output_dir, config.trees_dir);
+ path::make_dir(path);
+ for (size_t i = 0; i < forest_storage.size(); ++i) {
+ for (size_t j = 0; j < forest_storage[i].Size(); ++j) {
+ std::stringstream filename;
+ filename << "forest_" << i << "_tree_" << j << ".dot";
+ std::string full_filename = path::append_path(path, filename.str());
+ std::ofstream out;
+ out.open(full_filename);
+ out << forest_storage[i].Trees()[j];
+ out.close();
+ }
+ }
+}
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/clonal_trees/tree_creator/exporters.hpp b/src/ig_simulator/clonal_trees/tree_creator/exporters.hpp
new file mode 100644
index 00000000..c6c5b3a2
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/tree_creator/exporters.hpp
@@ -0,0 +1,20 @@
+//
+// Created by Andrew Bzikadze on 4/14/17.
+//
+
+#pragma once
+
+#include
+#include
+#include
+#include "ig_simulator_config.hpp"
+
+namespace ig_simulator {
+
+void TreeExporter(const Tree& tree, size_t forest_ind, size_t tree_ind, std::ostream& full, std::ostream& included);
+void ForestExporter(const Forest& forest, size_t forest_ind, std::ostream& full, std::ostream& included);
+void ForestStorageExporter(const ForestStorage& forest_storage, std::ostream& full, std::ostream& included);
+
+void EdgeListsExporters(const ForestStorage& forest_storage, const IgSimulatorConfig::IOParams::OutputParams& config);
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/clonal_trees/tree_creator/forest_creator.cpp b/src/ig_simulator/clonal_trees/tree_creator/forest_creator.cpp
new file mode 100644
index 00000000..3c94902d
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/tree_creator/forest_creator.cpp
@@ -0,0 +1,9 @@
+//
+// Created by Andrew Bzikadze on 4/14/17.
+//
+
+#include "forest_creator.hpp"
+
+namespace ig_simulator {
+
+} // End namespace ig_simulator
\ No newline at end of file
diff --git a/src/ig_simulator/clonal_trees/tree_creator/forest_creator.hpp b/src/ig_simulator/clonal_trees/tree_creator/forest_creator.hpp
new file mode 100644
index 00000000..6835f0f3
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/tree_creator/forest_creator.hpp
@@ -0,0 +1,40 @@
+//
+// Created by Andrew Bzikadze on 4/14/17.
+//
+
+#pragma once
+
+#include "tree_creator.hpp"
+#include "clonal_trees/forest/forest.hpp"
+#include "base_repertoire/metaroot_cluster/metaroot_cluster.hpp"
+
+namespace ig_simulator {
+
+class ForestCreator {
+private:
+ const TreeCreator tree_creator;
+
+public:
+ ForestCreator(const vj_finder::VJFinderConfig& vjf_config,
+ const ClonalTreeSimulatorParams& config):
+ tree_creator(vjf_config, config)
+ { }
+
+ ForestCreator(const ForestCreator&) = delete;
+ ForestCreator(ForestCreator&&) = delete;
+ ForestCreator& operator=(const ForestCreator&) = delete;
+ ForestCreator& operator=(ForestCreator&&) = delete;
+
+ template
+ Forest GenerateForest(const MetarootCluster& root) const {
+ std::vector trees;
+ trees.reserve(root.Multiplicity());
+ for(size_t i = 0; i < root.Multiplicity(); ++i) {
+ Tree tree { tree_creator.GenerateTree(root.MetarootPtr().get()) };
+ trees.emplace_back(std::move(tree));
+ }
+ return Forest(&root, std::move(trees));
+ }
+};
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/clonal_trees/tree_creator/forest_storage_creator.cpp b/src/ig_simulator/clonal_trees/tree_creator/forest_storage_creator.cpp
new file mode 100644
index 00000000..9e506faf
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/tree_creator/forest_storage_creator.cpp
@@ -0,0 +1,5 @@
+//
+// Created by Andrew Bzikadze on 4/14/17.
+//
+
+#include "forest_storage_creator.hpp"
diff --git a/src/ig_simulator/clonal_trees/tree_creator/forest_storage_creator.hpp b/src/ig_simulator/clonal_trees/tree_creator/forest_storage_creator.hpp
new file mode 100644
index 00000000..3fa62550
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/tree_creator/forest_storage_creator.hpp
@@ -0,0 +1,37 @@
+//
+// Created by Andrew Bzikadze on 4/14/17.
+//
+
+#pragma once
+
+#include "forest_creator.hpp"
+#include "base_repertoire/base_repertoire.hpp"
+
+namespace ig_simulator {
+
+class ForestStorageCreator {
+private:
+ const ForestCreator forest_creator;
+
+public:
+ ForestStorageCreator(const vj_finder::VJFinderConfig& vjf_config,
+ const ClonalTreeSimulatorParams& config):
+ forest_creator(vjf_config, config)
+ { }
+
+ ForestStorageCreator(const ForestStorageCreator&) = delete;
+ ForestStorageCreator(ForestStorageCreator&&) = delete;
+ ForestStorageCreator& operator=(const ForestStorageCreator&) = delete;
+ ForestStorageCreator& operator=(ForestStorageCreator&&) = delete;
+
+ template
+ ForestStorage GenerateForest(const BaseRepertoire& repertoire) const {
+ ForestStorage storage;
+ for(const auto& cluster : repertoire) {
+ storage.emplace_back(forest_creator.GenerateForest(cluster));
+ }
+ return storage;
+ }
+};
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/clonal_trees/tree_creator/pool_manager.cpp b/src/ig_simulator/clonal_trees/tree_creator/pool_manager.cpp
new file mode 100644
index 00000000..d0b67309
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/tree_creator/pool_manager.cpp
@@ -0,0 +1,70 @@
+//
+// Created by Andrew Bzikadze on 4/9/17.
+//
+
+#include
+#include "pool_manager.hpp"
+
+namespace ig_simulator {
+
+std::pair UniformPoolManager::GetIndex(size_t n_insert) {
+ size_t raw_index = random_index(1, pool.Sum());
+ size_t index, freq;
+ std::tie(index, freq) = pool.LowerBound(raw_index);
+ VERIFY(freq == 1);
+
+ for (size_t i = 0; i < n_insert; ++i) {
+ pool.Insert(max_index++, 1);
+ }
+
+ bool ret_to_pool = ret_to_pool_distr(MTSingleton::GetInstance());
+ if (not ret_to_pool) {
+ pool.Erase(index, freq);
+ }
+ return { index, ret_to_pool };
+}
+
+std::pair WideTreePoolManager::GetIndex(size_t n_insert) {
+ size_t raw_index = random_index(1, pool.Sum());
+ size_t index, freq;
+ std::tie(index, freq) = pool.LowerBound(raw_index);
+
+ for (size_t i = 0; i < n_insert; ++i) {
+ pool.Insert(max_index++, 1);
+ }
+
+ bool ret_to_pool = ret_to_pool_distr(MTSingleton::GetInstance());
+ if (ret_to_pool) {
+ pool.SetFreq(index, freq, freq + 1);
+ } else {
+ pool.Erase(index, freq);
+ }
+ return { index, ret_to_pool };
+}
+
+std::pair DeepTreePoolManager::GetIndex(size_t n_insert) {
+ size_t raw_index = random_index(1, pool.Sum());
+ size_t index, freq;
+ std::tie(index, freq) = pool.LowerBound(raw_index);
+
+ size_t new_freq = freq + 1;
+ if (freq < std::numeric_limits::max()) {
+ new_freq += static_cast(static_cast(new_freq) * 0.5);
+ } else {
+ new_freq += static_cast(sqrt(static_cast(new_freq)));
+ }
+
+ for (size_t i = 0; i < n_insert; ++i) {
+ pool.Insert(max_index++, new_freq);
+ }
+
+ bool ret_to_pool = ret_to_pool_distr(MTSingleton::GetInstance());
+ if (ret_to_pool) {
+ pool.SetFreq(index, freq, new_freq);
+ } else {
+ pool.Erase(index, freq);
+ }
+ return { index, ret_to_pool };
+}
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/clonal_trees/tree_creator/pool_manager.hpp b/src/ig_simulator/clonal_trees/tree_creator/pool_manager.hpp
new file mode 100644
index 00000000..11a53bcf
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/tree_creator/pool_manager.hpp
@@ -0,0 +1,74 @@
+//
+// Created by Andrew Bzikadze on 4/9/17.
+//
+
+#pragma once
+
+#include
+#include "cartesian_tree.hpp"
+#include "simulation_routines.hpp"
+#include "ig_simulator_utils.hpp"
+
+namespace ig_simulator {
+
+class AbstractPoolManager {
+protected:
+ Treap<> pool;
+ mutable std::bernoulli_distribution ret_to_pool_distr;
+ size_t max_index;
+
+public:
+ AbstractPoolManager(double ret_prob):
+ pool(),
+ ret_to_pool_distr(check_probability(ret_prob)),
+ max_index(1)
+ {
+ pool.Insert(0, 1);
+ }
+
+ AbstractPoolManager(const AbstractPoolManager&) = delete;
+ AbstractPoolManager(AbstractPoolManager&&) = delete;
+ AbstractPoolManager& operator=(const AbstractPoolManager&) = delete;
+ AbstractPoolManager& operator=(AbstractPoolManager&&) = delete;
+
+ size_t MaxIndex() const { return max_index; }
+ void Erase(size_t index) {
+ VERIFY(index < max_index);
+ pool.Erase(index);
+ }
+
+ size_t Size() const { return pool.Size(); }
+ virtual std::pair GetIndex(size_t n_insert) = 0;
+};
+
+using AbstractPoolManagerCPtr = std::unique_ptr;
+
+
+class UniformPoolManager final : public AbstractPoolManager {
+public:
+ UniformPoolManager(double ret_prob):
+ AbstractPoolManager(ret_prob)
+ { }
+
+ std::pair GetIndex(size_t n_insert) override;
+};
+
+class WideTreePoolManager final : public AbstractPoolManager {
+public:
+ WideTreePoolManager(double ret_prob):
+ AbstractPoolManager(ret_prob)
+ { }
+
+ std::pair GetIndex(size_t n_insert) override;
+};
+
+class DeepTreePoolManager final : public AbstractPoolManager {
+public:
+ DeepTreePoolManager(double ret_prob):
+ AbstractPoolManager(ret_prob)
+ { }
+
+ std::pair GetIndex(size_t n_insert) override;
+};
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/clonal_trees/tree_creator/shm_creator.cpp b/src/ig_simulator/clonal_trees/tree_creator/shm_creator.cpp
new file mode 100644
index 00000000..32b6d0b4
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/tree_creator/shm_creator.cpp
@@ -0,0 +1,52 @@
+//
+// Created by Andrew Bzikadze on 4/12/17.
+//
+
+#include "shm_creator.hpp"
+#include "random_generator.hpp"
+
+namespace ig_simulator {
+
+Node::SHM_Vector PoissonShmCreator::GenerateSHM_Vector(const std::string& seq) const {
+ size_t length = seq.length();
+ std::uniform_int_distribution ind_distr(fix_left, length - 1 - fix_right);
+ size_t mut_numb = distribution(MTSingleton::GetInstance()) + 1;
+ std::vector mut_inds;
+ mut_inds.reserve(mut_numb);
+ while(mut_inds.size() < mut_numb) {
+ size_t ind = ind_distr(MTSingleton::GetInstance());
+ if (std::find(mut_inds.begin(), mut_inds.end(), ind) == mut_inds.end()) {
+ mut_inds.emplace_back(ind);
+ }
+ }
+
+ std::uniform_int_distribution mut_distr(0, 2);
+ Node::SHM_Vector shm_vector;
+ shm_vector.reserve(mut_numb);
+ for(const auto& mut_ind : mut_inds) {
+ seqan::Dna5 old_nucl { seq[mut_ind] };
+ size_t ind_nucl_old = old_nucl.value;
+ size_t ind_nucl_new = mut_distr(MTSingleton::GetInstance());
+ seqan::Dna new_nucl { ind_nucl_new < ind_nucl_old ? ind_nucl_new : ((ind_nucl_new + 1) & 3) };
+ VERIFY_MSG(old_nucl != 'N' ? old_nucl != new_nucl : true,
+ std::string("Old nucl: ") << old_nucl
+ << ", New nucl: " << new_nucl
+ << " old nucl index: " << ind_nucl_old
+ << " new nucl index: " << ind_nucl_new
+ );
+ shm_vector.emplace_back(mut_ind, old_nucl, new_nucl);
+ }
+ return shm_vector;
+}
+
+AbstractShmCreatorCPtr get_shm_creator(const vj_finder::VJFinderConfig& vjf_config,
+ const SHM_CreatorParams& config)
+{
+ using SHM_CreatorMethod = SHM_CreatorParams::SHM_CreatorMethod;
+ if (config.method == SHM_CreatorMethod::Poisson) {
+ return std::unique_ptr(new PoissonShmCreator(vjf_config, config.poisson_params));
+ }
+ VERIFY(false);
+}
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/clonal_trees/tree_creator/shm_creator.hpp b/src/ig_simulator/clonal_trees/tree_creator/shm_creator.hpp
new file mode 100644
index 00000000..dbb1c550
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/tree_creator/shm_creator.hpp
@@ -0,0 +1,62 @@
+//
+// Created by Andrew Bzikadze on 4/12/17.
+//
+
+#pragma once
+
+#include
+#include "ig_simulator_utils.hpp"
+#include "clonal_trees/tree/node.hpp"
+#include
+#include "ig_simulator_config.hpp"
+
+namespace ig_simulator {
+
+class AbstractShmCreator {
+protected:
+ const size_t fix_left;
+ const size_t fix_right;
+
+public:
+ AbstractShmCreator() = delete;
+ AbstractShmCreator(const AbstractShmCreator&) = delete;
+ AbstractShmCreator(AbstractShmCreator&&) = delete;
+ AbstractShmCreator& operator=(const AbstractShmCreator&) = delete;
+ AbstractShmCreator& operator=(AbstractShmCreator&&) = delete;
+
+ explicit AbstractShmCreator(const vj_finder::VJFinderConfig& config):
+ fix_left(config.algorithm_params.fix_crop_fill_params.fix_left),
+ fix_right(config.algorithm_params.fix_crop_fill_params.fix_right)
+ { }
+
+ virtual ~AbstractShmCreator() { }
+
+ virtual Node::SHM_Vector GenerateSHM_Vector(const std::string&) const = 0;
+};
+
+using AbstractShmCreatorCPtr = std::unique_ptr;
+
+
+class PoissonShmCreator final : public AbstractShmCreator {
+private:
+ mutable std::poisson_distribution distribution;
+
+public:
+ PoissonShmCreator(const vj_finder::VJFinderConfig& vjf_config,
+ double lambda):
+ AbstractShmCreator(vjf_config),
+ distribution(check_numeric_positive(lambda))
+ { }
+
+ PoissonShmCreator(const vj_finder::VJFinderConfig& vjf_config,
+ const SHM_CreatorParams::PoissonCreatorParams& config):
+ PoissonShmCreator(vjf_config, config.lambda)
+ { }
+
+
+ Node::SHM_Vector GenerateSHM_Vector(const std::string&) const override;
+};
+
+AbstractShmCreatorCPtr get_shm_creator(const vj_finder::VJFinderConfig&, const SHM_CreatorParams&);
+
+} // End namespace ig_simulator
\ No newline at end of file
diff --git a/src/ig_simulator/clonal_trees/tree_creator/tree_creator.cpp b/src/ig_simulator/clonal_trees/tree_creator/tree_creator.cpp
new file mode 100644
index 00000000..b54dd49b
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/tree_creator/tree_creator.cpp
@@ -0,0 +1,9 @@
+//
+// Created by Andrew Bzikadze on 4/9/17.
+//
+
+#include "tree_creator.hpp"
+
+namespace ig_simulator {
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/clonal_trees/tree_creator/tree_creator.hpp b/src/ig_simulator/clonal_trees/tree_creator/tree_creator.hpp
new file mode 100644
index 00000000..e8506c79
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/tree_creator/tree_creator.hpp
@@ -0,0 +1,117 @@
+//
+// Created by Andrew Bzikadze on 4/9/17.
+//
+
+#pragma once
+
+#include "clonal_trees/tree/tree.hpp"
+#include "pool_manager.hpp"
+#include "base_repertoire/metaroot/metaroot.hpp"
+#include "shm_creator.hpp"
+#include "tree_size_generator.hpp"
+#include "clonal_trees/fast_stop_codon_checker/fast_stop_codon_checker.hpp"
+
+namespace ig_simulator {
+
+class TreeCreator {
+protected:
+ const AbstractShmCreatorCPtr shm_creator;
+ const AbstractTreeSizeGeneratorCPtr tree_size_generator;
+ const double ret_prob;
+
+ mutable std::geometric_distribution distr_n_children;
+
+private:
+ std::string CreateSequence(const std::string& base_seq, const Node::SHM_Vector& shms) const {
+ std::string seq = base_seq;
+
+ for(const auto& shm : shms) {
+ VERIFY_MSG(seq[std::get<0>(shm)] == std::get<1>(shm),
+ std::string("real seq: ") << seq <<
+ ", position: " << std::get<0>(shm) <<
+ ", expected: " << std::get<1>(shm));
+ seq[std::get<0>(shm)] = std::get<2>(shm);
+ }
+ return seq;
+ }
+
+public:
+ TreeCreator(AbstractShmCreatorCPtr&& shm_creator,
+ AbstractTreeSizeGeneratorCPtr&& tree_size_generator,
+ double ret_prob,
+ double lambda_distr_n_children):
+ shm_creator(std::move(shm_creator)),
+ tree_size_generator(std::move(tree_size_generator)),
+ ret_prob(check_numeric_positive(ret_prob)),
+ distr_n_children(check_numeric_positive(lambda_distr_n_children))
+ { }
+
+ TreeCreator(const vj_finder::VJFinderConfig& vjf_config,
+ const ClonalTreeSimulatorParams& config):
+ TreeCreator(get_shm_creator(vjf_config, config.shm_creator_params),
+ get_tree_size_generator(config.tree_size_generator_params),
+ config.prob_ret_to_pool,
+ config.lambda_distr_n_children)
+ { }
+
+ TreeCreator(const TreeCreator&) = delete;
+ TreeCreator(TreeCreator&&) = delete;
+ TreeCreator& operator=(const TreeCreator&) = delete;
+ TreeCreator& operator=(TreeCreator&&) = delete;
+
+ template
+ Tree GenerateTree(const AbstractMetaroot* const root) const {
+ static_assert(std::is_base_of::value,
+ "Pool Manager should be derived from @class AbstractPoolManager");
+
+ size_t tree_size = tree_size_generator->Generate();
+ std::vector nodes;
+ nodes.reserve(tree_size);
+ nodes.emplace_back();
+
+ std::vector sequences;
+ sequences.reserve(tree_size);
+ sequences.emplace_back(root->Sequence());
+
+ if (not root->IsProductive()) {
+ nodes.back().MakeNonProductive();
+ return Tree(root, std::move(nodes), std::move(sequences));
+ }
+
+ PoolManager pool_manager(ret_prob);
+
+ while(nodes.size() < tree_size) {
+ size_t n_children = distr_n_children(MTSingleton::GetInstance()) + 1;
+ n_children = std::min(n_children, tree_size - nodes.size());
+
+ size_t parent_ind;
+ bool stay;
+ std::tie(parent_ind, stay) = pool_manager.GetIndex(n_children);
+
+ if (not stay) {
+ nodes[parent_ind].Exclude();
+ }
+
+ for (size_t i = 0; i < n_children; ++i) {
+ const std::string& base_sequence = sequences[parent_ind];
+ Node::SHM_Vector shm_vector { shm_creator->GenerateSHM_Vector(base_sequence)};
+ std::string sequence = CreateSequence(base_sequence, shm_vector);
+
+ nodes.emplace_back(parent_ind, std::move(shm_vector));
+ sequences.emplace_back(std::move(sequence));
+
+ if (FastStopCodonChecker::HasStopCodon(sequences.back(), root->CDRLabeling())) {
+ nodes.back().MakeNonProductive();
+ pool_manager.Erase(pool_manager.MaxIndex() - n_children + i);
+ }
+ }
+ if (pool_manager.Size() == 0) { break; } // All leafs are non-productive
+ }
+ if (pool_manager.Size() != 0) { // Only when leafs all non-productive VERIFY should not be checked.
+ VERIFY(nodes.size() == tree_size);
+ }
+ return Tree(root, std::move(nodes), std::move(sequences));
+ }
+};
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/clonal_trees/tree_creator/tree_size_generator.cpp b/src/ig_simulator/clonal_trees/tree_creator/tree_size_generator.cpp
new file mode 100644
index 00000000..c8778217
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/tree_creator/tree_size_generator.cpp
@@ -0,0 +1,17 @@
+//
+// Created by Andrew Bzikadze on 4/11/17.
+//
+
+#include "tree_size_generator.hpp"
+
+namespace ig_simulator {
+
+AbstractTreeSizeGeneratorCPtr get_tree_size_generator(const TreeSizeGeneratorParams& config) {
+ using Method = TreeSizeGeneratorParams::TreeSizeGeneratorMethod;
+ if (config.method == Method::Geometric) {
+ return AbstractTreeSizeGeneratorCPtr(new GeometricTreeSizeGenerator(config.geometric_params));
+ }
+ VERIFY(false);
+}
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/clonal_trees/tree_creator/tree_size_generator.hpp b/src/ig_simulator/clonal_trees/tree_creator/tree_size_generator.hpp
new file mode 100644
index 00000000..95a54c7d
--- /dev/null
+++ b/src/ig_simulator/clonal_trees/tree_creator/tree_size_generator.hpp
@@ -0,0 +1,52 @@
+//
+// Created by Andrew Bzikadze on 4/11/17.
+//
+
+#pragma once
+
+#include
+#include
+
+#include "ig_simulator_utils.hpp"
+#include "simulation_routines.hpp"
+#include "ig_simulator_config.hpp"
+
+namespace ig_simulator {
+
+class AbstractTreeSizeGenerator {
+public:
+ AbstractTreeSizeGenerator() = default;
+ AbstractTreeSizeGenerator(const AbstractTreeSizeGenerator&) = delete;
+ AbstractTreeSizeGenerator(AbstractTreeSizeGenerator&&) = delete;
+ AbstractTreeSizeGenerator& operator=(const AbstractTreeSizeGenerator&) = delete;
+ AbstractTreeSizeGenerator& operator=(AbstractTreeSizeGenerator&&) = delete;
+
+ virtual size_t Generate() const = 0;
+
+ virtual ~AbstractTreeSizeGenerator() { }
+};
+
+using AbstractTreeSizeGeneratorCPtr = std::unique_ptr;
+
+class GeometricTreeSizeGenerator final : public AbstractTreeSizeGenerator {
+private:
+ mutable std::geometric_distribution distribution;
+
+public:
+ GeometricTreeSizeGenerator(double lambda):
+ AbstractTreeSizeGenerator(),
+ distribution(check_numeric_positive(lambda))
+ { }
+
+ GeometricTreeSizeGenerator(const TreeSizeGeneratorParams::GeometricParams& params):
+ GeometricTreeSizeGenerator(params.lambda)
+ { }
+
+ size_t Generate() const override {
+ return distribution(MTSingleton::GetInstance()) + 1;
+ }
+};
+
+AbstractTreeSizeGeneratorCPtr get_tree_size_generator(const TreeSizeGeneratorParams& config);
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/ig_simulator_config.cpp b/src/ig_simulator/ig_simulator_config.cpp
new file mode 100644
index 00000000..f6a4bb31
--- /dev/null
+++ b/src/ig_simulator/ig_simulator_config.cpp
@@ -0,0 +1,319 @@
+//
+// Created by Andrew Bzikadze on 3/15/17.
+//
+
+#include "ig_simulator_config.hpp"
+#include
+#include
+
+namespace ig_simulator {
+
+// IOParams start
+void load(IgSimulatorConfig::IOParams::InputParams &input_params, boost::property_tree::ptree const &pt, bool) {
+ using config_common::load;
+ load(input_params.germline_input, pt, "germline_input");
+ load(input_params.cdr_labeler_config_filename, pt, "cdr_labeler_config_filename");
+}
+
+void load(IgSimulatorConfig::IOParams::OutputParams &output_params, boost::property_tree::ptree const &pt, bool) {
+ using config_common::load;
+ load(output_params.log_filename, pt, "log_filename");
+ load(output_params.output_dir, pt, "output_dir");
+ load(output_params.base_repertoire_filename, pt, "base_repertoire_filename");
+ load(output_params.base_repertoire_info, pt, "base_repertoire_info");
+ load(output_params.filtered_pool, pt, "filtered_pool");
+ load(output_params.full_pool, pt, "full_pool");
+ load(output_params.trees_dir, pt, "trees_dir");
+}
+
+void load(IgSimulatorConfig::IOParams &io_params, boost::property_tree::ptree const &pt, bool) {
+ using config_common::load;
+ load(io_params.input_params, pt, "input_params");
+ load(io_params.output_params, pt, "output_params");
+}
+// IOParams end
+
+// SimulationParams start
+void load(GeneChooserParams::CustomGeneChooserParams& custom_gene_chooser_params,
+ boost::property_tree::ptree const &pt, bool)
+{
+ using config_common::load;
+ load(custom_gene_chooser_params.v_genes_probs, pt, "v_genes_probs");
+ load(custom_gene_chooser_params.v_genes_probs, pt, "d_genes_probs");
+ load(custom_gene_chooser_params.v_genes_probs, pt, "j_genes_probs");
+}
+
+
+void load(IgSimulatorConfig::SimulationParams::BaseRepertoireParams::MetarootSimulationParams
+ ::GeneChooserParams &gene_chooser_params,
+ boost::property_tree::ptree const &pt, bool) {
+ using config_common::load;
+ using GeneChooserMethod =
+ IgSimulatorConfig::SimulationParams::BaseRepertoireParams::
+ MetarootSimulationParams::GeneChooserParams::GeneChooserMethod;
+ std::string method_str(pt.get("gene_chooser_method"));
+ std::string method_str_lowercase(method_str);
+ std::transform(method_str.begin(), method_str.end(),
+ method_str_lowercase.begin(), ::tolower);
+ if (method_str == "uniform") {
+ gene_chooser_params.method = GeneChooserMethod::Uniform;
+ } else if (method_str == "custom") {
+ gene_chooser_params.method = GeneChooserMethod::Custom;
+ load(gene_chooser_params.custom_gene_chooser_params, pt, "custom_chooser_params");
+ } else {
+ VERIFY(false);
+ }
+}
+
+void load(IgSimulatorConfig::SimulationParams::BaseRepertoireParams::MetarootSimulationParams
+ ::NucleotidesRemoverParams::UniformRemoverParams &uniform_remover_params,
+ boost::property_tree::ptree const &pt, bool) {
+ using config_common::load;
+ load(uniform_remover_params.max_remove_v_gene, pt, "max_remove_v_gene");
+ load(uniform_remover_params.max_remove_d_gene_left, pt, "max_remove_d_gene_left");
+ load(uniform_remover_params.max_remove_d_gene_right, pt, "max_remove_d_gene_right");
+ load(uniform_remover_params.max_remove_j_gene, pt, "max_remove_j_gene");
+}
+
+void load(IgSimulatorConfig::SimulationParams::BaseRepertoireParams::MetarootSimulationParams
+ ::NucleotidesRemoverParams &nucleotides_remover_params,
+ boost::property_tree::ptree const &pt, bool) {
+ using config_common::load;
+ using NucleotidesRemoverMethod =
+ IgSimulatorConfig::SimulationParams::BaseRepertoireParams::
+ MetarootSimulationParams::NucleotidesRemoverParams::NucleotidesRemoverMethod;
+ std::string method_str(pt.get("nucleotides_remover_method"));
+ std::string method_str_lowercase(method_str);
+ std::transform(method_str.begin(), method_str.end(),
+ method_str_lowercase.begin(), ::tolower);
+ if (method_str == "uniform") {
+ nucleotides_remover_params.method = NucleotidesRemoverMethod::Uniform;
+ load(nucleotides_remover_params.uniform_remover_params, pt, "uniform_remover_params");
+ } else {
+ VERIFY(false);
+ }
+}
+
+void load(IgSimulatorConfig::SimulationParams::BaseRepertoireParams::MetarootSimulationParams
+ ::PNucleotidesCreatorParams::UniformCreatorParams &uniform_creator_params,
+ boost::property_tree::ptree const &pt, bool) {
+ using config_common::load;
+ load(uniform_creator_params.max_create_v_gene, pt, "max_create_v_gene");
+ load(uniform_creator_params.max_create_d_gene_left, pt, "max_create_d_gene_left");
+ load(uniform_creator_params.max_create_d_gene_right, pt, "max_create_d_gene_right");
+ load(uniform_creator_params.max_create_j_gene, pt, "max_create_j_gene");
+}
+
+void load(IgSimulatorConfig::SimulationParams::BaseRepertoireParams::MetarootSimulationParams
+ ::PNucleotidesCreatorParams &p_nucleptides_creator_params,
+ boost::property_tree::ptree const &pt, bool) {
+ using config_common::load;
+ using PNucleotidesCreatorParams =
+ IgSimulatorConfig::SimulationParams::BaseRepertoireParams::
+ MetarootSimulationParams::PNucleotidesCreatorParams::PNucleotidesCreatorMethod;
+ std::string method_str(pt.get("p_nucleotides_creator_method"));
+ std::string method_str_lowercase(method_str);
+ std::transform(method_str.begin(), method_str.end(),
+ method_str_lowercase.begin(), ::tolower);
+ if (method_str == "uniform") {
+ p_nucleptides_creator_params.method = PNucleotidesCreatorParams::Uniform;
+ load(p_nucleptides_creator_params.uniform_creator_params, pt, "uniform_creator_params");
+ } else {
+ VERIFY(false);
+ }
+}
+
+void load(IgSimulatorConfig::SimulationParams::BaseRepertoireParams::MetarootSimulationParams
+ ::NNucleotidesInserterParams::UniformInserterParams &uniform_inserter_params,
+ boost::property_tree::ptree const &pt, bool) {
+ using config_common::load;
+ load(uniform_inserter_params.max_vj_insertion, pt, "max_vj_insertion");
+ load(uniform_inserter_params.max_vd_insertion, pt, "max_vd_insertion");
+ load(uniform_inserter_params.max_dj_insertion, pt, "max_dj_insertion");
+}
+
+void load(IgSimulatorConfig::SimulationParams::BaseRepertoireParams::MetarootSimulationParams
+ ::NNucleotidesInserterParams &n_nucleotides_inserter_params,
+ boost::property_tree::ptree const &pt, bool) {
+ using config_common::load;
+ using NNucleotidesInserterParams =
+ IgSimulatorConfig::SimulationParams::BaseRepertoireParams::
+ MetarootSimulationParams::NNucleotidesInserterParams::NNucleotidesInserterMethod;
+ std::string method_str(pt.get("n_nucleotides_method"));
+ std::string method_str_lowercase(method_str);
+ std::transform(method_str.begin(), method_str.end(),
+ method_str_lowercase.begin(), ::tolower);
+ if (method_str == "uniform") {
+ n_nucleotides_inserter_params.method = NNucleotidesInserterParams::Uniform;
+ load(n_nucleotides_inserter_params.uniform_inserter_params, pt, "uniform_inserter_params");
+ } else {
+ VERIFY(false);
+ }
+}
+
+void load(IgSimulatorConfig::SimulationParams::BaseRepertoireParams::MetarootSimulationParams
+ ::CleavageParams &cleavage_params,
+ boost::property_tree::ptree const &pt, bool) {
+ using config_common::load;
+ load(cleavage_params.prob_cleavage_v, pt, "prob_cleavage_v");
+ load(cleavage_params.prob_cleavage_d_left, pt, "prob_cleavage_d_left");
+ load(cleavage_params.prob_cleavage_d_right, pt, "prob_cleavage_d_right");
+ load(cleavage_params.prob_cleavage_j, pt, "prob_cleavage_j");
+}
+
+void load(IgSimulatorConfig::SimulationParams::BaseRepertoireParams
+ ::MetarootSimulationParams &metaroot_simulation_params,
+ boost::property_tree::ptree const &pt, bool)
+{
+ using config_common::load;
+ load(metaroot_simulation_params.gene_chooser_params, pt, "gene_chooser_params");
+ load(metaroot_simulation_params.nucleotides_remover_params, pt, "nucleotides_remover_params");
+ load(metaroot_simulation_params.p_nucleotides_creator_params, pt, "p_nucleotides_creator_params");
+ load(metaroot_simulation_params.n_nucleotides_inserter_params, pt, "n_nucleotides_inserter_params");
+ load(metaroot_simulation_params.cleavage_params, pt, "cleavage_params");
+}
+
+void load(MultiplicityCreatorParams::GeometricParams &geometric_params,
+ boost::property_tree::ptree const &pt, bool)
+{
+ using config_common::load;
+ load(geometric_params.lambda, pt, "lambda");
+}
+
+void load(MultiplicityCreatorParams &multiplicity_creator_params,
+ boost::property_tree::ptree const &pt, bool)
+{
+ using config_common::load;
+ using MultiplicityCreatorMethod = MultiplicityCreatorParams::MultiplicityCreatorMethod;
+
+ std::string method_str(pt.get("multiplicity_method"));
+ std::string method_str_lowercase(method_str);
+ std::transform(method_str.begin(), method_str.end(),
+ method_str_lowercase.begin(), ::tolower);
+ if (method_str == "geometric") {
+ multiplicity_creator_params.method = MultiplicityCreatorMethod::Geometric;
+ load(multiplicity_creator_params.geometric_params, pt, "geometric_params");
+ } else {
+ VERIFY(false);
+ }
+}
+
+void load(ProductiveParams &base_repertoire_params,
+ boost::property_tree::ptree const &pt, bool)
+{
+ using config_common::load;
+ load(base_repertoire_params.productive_part, pt, "productive_part");
+}
+
+void load(IgSimulatorConfig::SimulationParams::BaseRepertoireParams &base_repertoire_params,
+ boost::property_tree::ptree const &pt, bool)
+{
+ using config_common::load;
+ load(base_repertoire_params.metaroot_simulation_params, pt, "metaroot_simulation_params");
+ load(base_repertoire_params.multiplicity_creator_params, pt, "multiplicity_creator_params");
+ load(base_repertoire_params.productive_params, pt, "productive_params");
+ load(base_repertoire_params.number_of_metaroots, pt, "number_of_metaroots");
+}
+
+void load(TreeSizeGeneratorParams::GeometricParams &geometric_params,
+ boost::property_tree::ptree const &pt, bool)
+{
+ using config_common::load;
+ load(geometric_params.lambda, pt, "lambda");
+}
+
+void load(TreeSizeGeneratorParams &tree_size_generator_params,
+ boost::property_tree::ptree const &pt, bool)
+{
+ using config_common::load;
+ using TreeSizeGeneratorMethod = TreeSizeGeneratorParams::TreeSizeGeneratorMethod;
+
+ std::string method_str(pt.get("tree_size_generator_method"));
+ std::string method_str_lowercase(method_str);
+ std::transform(method_str.begin(), method_str.end(),
+ method_str_lowercase.begin(), ::tolower);
+ if (method_str == "geometric") {
+ tree_size_generator_params.method = TreeSizeGeneratorMethod::Geometric;
+ load(tree_size_generator_params.geometric_params, pt, "geometric_params");
+ } else {
+ VERIFY(false);
+ }
+}
+
+void load(SHM_CreatorParams::PoissonCreatorParams ¶ms,
+ boost::property_tree::ptree const &pt, bool)
+{
+ using config_common::load;
+ load(params.lambda, pt, "lambda");
+}
+
+void load(SHM_CreatorParams &shm_creator_params,
+ boost::property_tree::ptree const &pt, bool) {
+ using config_common::load;
+ using SHM_CreatorMethod = SHM_CreatorParams::SHM_CreatorMethod;
+
+ std::string method_str(pt.get("shm_creator_method"));
+ std::string method_str_lowercase(method_str);
+ std::transform(method_str.begin(), method_str.end(),
+ method_str_lowercase.begin(), ::tolower);
+ if (method_str == "poisson") {
+ shm_creator_params.method = SHM_CreatorMethod::Poisson;
+ load(shm_creator_params.poisson_params, pt, "poisson_params");
+ } else {
+ VERIFY(false);
+ }
+}
+
+void load(ClonalTreeSimulatorParams &clonal_tree_simulator_params,
+ boost::property_tree::ptree const &pt, bool)
+{
+ using config_common::load;
+ using PoolManagerStrategy = ClonalTreeSimulatorParams::PoolManagerStrategy;
+
+ std::string method_str(pt.get("pool_manager_strategy"));
+ std::string method_str_lowercase(method_str);
+ std::transform(method_str.begin(), method_str.end(),
+ method_str_lowercase.begin(), ::tolower);
+ if (method_str == "uniform") {
+ clonal_tree_simulator_params.pool_manager_strategy = PoolManagerStrategy::UniformPoolManager;
+ } else if (method_str == "wide") {
+ clonal_tree_simulator_params.pool_manager_strategy = PoolManagerStrategy::WideTreePoolManager;
+ } else if (method_str == "deep") {
+ clonal_tree_simulator_params.pool_manager_strategy = PoolManagerStrategy::DeepTreePoolManager;
+ } else {
+ VERIFY(false);
+ }
+
+ load(clonal_tree_simulator_params.prob_ret_to_pool, pt, "prob_ret_to_pool");
+ load(clonal_tree_simulator_params.lambda_distr_n_children, pt, "lambda_distr_n_children");
+ load(clonal_tree_simulator_params.tree_size_generator_params, pt, "tree_size_generator_params");
+ load(clonal_tree_simulator_params.shm_creator_params, pt, "shm_creator_params");
+}
+
+void load(IgSimulatorConfig::SimulationParams &simulation_params,
+ boost::property_tree::ptree const &pt, bool)
+{
+ using config_common::load;
+ load(simulation_params.base_repertoire_params, pt, "base_repertoire_params");
+ load(simulation_params.clonal_tree_simulator_params, pt, "clonal_tree_simulator_params");
+}
+// SimulationParams end
+
+
+void load(IgSimulatorConfig &cfg, boost::property_tree::ptree const &pt, bool complete) {
+ using config_common::load;
+ load(cfg.io_params, pt, "io_params", complete);
+ load(cfg.simulation_params, pt, "simulation_params", complete);
+ load(cfg.germline_params, pt, "germline_params");
+ // TODO remove this hack
+ cfg.simulation_params.base_repertoire_params.
+ metaroot_simulation_params.cdr_labeler_config.load(cfg.io_params.input_params.cdr_labeler_config_filename);
+}
+
+void load(IgSimulatorConfig &cfg, std::string const &filename) {
+ boost::property_tree::ptree pt;
+ boost::property_tree::read_info(filename, pt);
+ load(cfg, pt, true);
+}
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/ig_simulator_config.hpp b/src/ig_simulator/ig_simulator_config.hpp
new file mode 100644
index 00000000..64a5d10a
--- /dev/null
+++ b/src/ig_simulator/ig_simulator_config.hpp
@@ -0,0 +1,193 @@
+//
+// Created by Andrew Bzikadze on 3/15/17.
+//
+
+#pragma once
+
+#include "io/library.hpp"
+#include
+#include "config_singl.hpp"
+#include "germline_utils/germline_config.hpp"
+#include "cdr_config.hpp"
+
+namespace ig_simulator {
+
+struct IgSimulatorConfig {
+ struct IOParams {
+ struct InputParams {
+ germline_utils::GermlineInput germline_input;
+ std::string cdr_labeler_config_filename;
+ };
+
+ struct OutputParams {
+ std::string output_dir;
+ std::string log_filename;
+ std::string base_repertoire_filename;
+ std::string base_repertoire_info;
+ std::string filtered_pool;
+ std::string full_pool;
+ std::string trees_dir;
+ };
+
+ InputParams input_params;
+ OutputParams output_params;
+ };
+
+
+ struct SimulationParams {
+ struct BaseRepertoireParams {
+ struct MetarootSimulationParams {
+ struct GeneChooserParams {
+ struct CustomGeneChooserParams {
+ std::string v_genes_probs;
+ std::string d_genes_probs;
+ std::string j_genes_probs;
+ };
+
+ enum class GeneChooserMethod { Uniform, Custom };
+ GeneChooserMethod method;
+ CustomGeneChooserParams custom_gene_chooser_params;
+ };
+
+ struct NucleotidesRemoverParams {
+ enum class NucleotidesRemoverMethod { Uniform };
+ struct UniformRemoverParams {
+ size_t max_remove_v_gene;
+ size_t max_remove_d_gene_left;
+ size_t max_remove_d_gene_right;
+ size_t max_remove_j_gene;
+ };
+ NucleotidesRemoverMethod method;
+ UniformRemoverParams uniform_remover_params;
+ };
+
+ struct PNucleotidesCreatorParams {
+ enum class PNucleotidesCreatorMethod { Uniform };
+ struct UniformCreatorParams {
+ size_t max_create_v_gene;
+ size_t max_create_d_gene_left;
+ size_t max_create_d_gene_right;
+ size_t max_create_j_gene;
+ };
+ PNucleotidesCreatorMethod method;
+ UniformCreatorParams uniform_creator_params;
+ };
+
+ struct NNucleotidesInserterParams {
+ enum class NNucleotidesInserterMethod { Uniform };
+ struct UniformInserterParams {
+ size_t max_vj_insertion;
+ size_t max_vd_insertion;
+ size_t max_dj_insertion;
+ };
+ NNucleotidesInserterMethod method;
+ UniformInserterParams uniform_inserter_params;
+ };
+
+ struct CleavageParams {
+ double prob_cleavage_v;
+ double prob_cleavage_d_left;
+ double prob_cleavage_d_right;
+ double prob_cleavage_j;
+ };
+
+ GeneChooserParams gene_chooser_params;
+ NucleotidesRemoverParams nucleotides_remover_params;
+ PNucleotidesCreatorParams p_nucleotides_creator_params;
+ NNucleotidesInserterParams n_nucleotides_inserter_params;
+ CleavageParams cleavage_params;
+ cdr_labeler::CDRLabelerConfig cdr_labeler_config;
+ };
+
+ struct MultiplicityCreatorParams {
+ struct GeometricParams {
+ double lambda;
+ };
+
+ enum class MultiplicityCreatorMethod { Geometric };
+ MultiplicityCreatorMethod method;
+ GeometricParams geometric_params;
+ };
+
+ struct ProductiveParams {
+ double productive_part;
+ };
+
+ MetarootSimulationParams metaroot_simulation_params;
+ MultiplicityCreatorParams multiplicity_creator_params;
+ ProductiveParams productive_params;
+
+ size_t number_of_metaroots;
+ };
+
+ struct ClonalTreeSimulatorParams {
+ struct TreeSizeGeneratorParams {
+ struct GeometricParams {
+ double lambda;
+ };
+
+ enum class TreeSizeGeneratorMethod { Geometric };
+ TreeSizeGeneratorMethod method;
+ GeometricParams geometric_params;
+ };
+
+ struct SHM_CreatorParams {
+ struct PoissonCreatorParams {
+ double lambda;
+ };
+
+ enum class SHM_CreatorMethod { Poisson };
+ SHM_CreatorMethod method;
+ PoissonCreatorParams poisson_params;
+ };
+
+ enum class PoolManagerStrategy { UniformPoolManager, WideTreePoolManager, DeepTreePoolManager };
+ PoolManagerStrategy pool_manager_strategy;
+
+ double prob_ret_to_pool;
+ double lambda_distr_n_children;
+ TreeSizeGeneratorParams tree_size_generator_params;
+ SHM_CreatorParams shm_creator_params;
+ };
+
+ BaseRepertoireParams base_repertoire_params;
+ ClonalTreeSimulatorParams clonal_tree_simulator_params;
+ };
+
+ IOParams io_params;
+ germline_utils::GermlineParams germline_params;
+ SimulationParams simulation_params;
+};
+
+using BaseRepertoireParams = IgSimulatorConfig::SimulationParams::BaseRepertoireParams;
+using ClonalTreeSimulatorParams = IgSimulatorConfig::SimulationParams::ClonalTreeSimulatorParams;
+
+using MetarootSimulationParams = BaseRepertoireParams::MetarootSimulationParams;
+using MultiplicityCreatorParams = BaseRepertoireParams::MultiplicityCreatorParams;
+using ProductiveParams = BaseRepertoireParams::ProductiveParams;
+
+using MultiplicityCreatorMethod = MultiplicityCreatorParams::MultiplicityCreatorMethod;
+
+using GeneChooserParams = MetarootSimulationParams::GeneChooserParams;
+using GeneChooserMethod = GeneChooserParams::GeneChooserMethod;
+
+using NucleotidesRemoverParams = MetarootSimulationParams::NucleotidesRemoverParams;
+using NucleotidesRemoverMethod = NucleotidesRemoverParams::NucleotidesRemoverMethod;
+
+using PNucleotidesCreatorParams = MetarootSimulationParams::PNucleotidesCreatorParams;
+using PNucleotidesCreatorMethod = PNucleotidesCreatorParams::PNucleotidesCreatorMethod;
+
+using NNucleotidesInserterParams = MetarootSimulationParams::NNucleotidesInserterParams;
+using NNucleotidesInserterMethod = NNucleotidesInserterParams::NNucleotidesInserterMethod;
+
+using CleavageParams = MetarootSimulationParams::CleavageParams;
+
+using TreeSizeGeneratorParams = ClonalTreeSimulatorParams::TreeSizeGeneratorParams;
+using SHM_CreatorParams = ClonalTreeSimulatorParams::SHM_CreatorParams;
+using PoolManagerStrategy = ClonalTreeSimulatorParams::PoolManagerStrategy;
+
+void load(IgSimulatorConfig &cfg, std::string const &filename);
+
+typedef config_common::config igs_cfg;
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/ig_simulator_launch.cpp b/src/ig_simulator/ig_simulator_launch.cpp
new file mode 100644
index 00000000..a4d101e9
--- /dev/null
+++ b/src/ig_simulator/ig_simulator_launch.cpp
@@ -0,0 +1,122 @@
+//
+// Created by Andrew Bzikadze on 3/15/17.
+//
+
+#include
+
+#include
+#include
+#include "ig_simulator_launch.hpp"
+#include "base_repertoire/base_repertoire_simulator.hpp"
+#include "clonal_trees/tree_creator/forest_storage_creator.hpp"
+
+using namespace germline_utils;
+
+namespace ig_simulator {
+
+germline_utils::ChainType IgSimulatorLaunch::GetLaunchChainType() const {
+ auto v_chain_type = germline_utils::LociParam::ConvertIntoChainTypes(config_.germline_params.loci);
+ VERIFY_MSG(v_chain_type.size() == 1, "Only specific chain type is allowed");
+ return v_chain_type[0];
+}
+
+std::vector
+IgSimulatorLaunch::GetDB(const germline_utils::ChainType chain_type) const
+{
+ GermlineDbGenerator db_generator(config_.io_params.input_params.germline_input,
+ config_.germline_params);
+ INFO("Generation of DB for variable segments...");
+ germline_utils::CustomGeneDatabase v_db = db_generator.GenerateVariableDb();
+ INFO("Generation of DB for diversity segments...");
+ germline_utils::CustomGeneDatabase d_db = db_generator.GenerateDiversityDb();
+ INFO("Generation of DB for join segments...");
+ germline_utils::CustomGeneDatabase j_db = db_generator.GenerateJoinDb();
+
+ std::vector db;
+ db.emplace_back(std::move(v_db));
+ if (chain_type.IsVDJ())
+ db.emplace_back(std::move(d_db));
+ db.emplace_back(std::move(j_db));
+ return db;
+}
+
+BaseRepertoire
+IgSimulatorLaunch::GetBaseRepertoire(const germline_utils::ChainType chain_type,
+ std::vector& db) const
+{
+ INFO("== Base Repertoire starts ==");
+ BaseRepertoireSimulator base_repertoire_simulator{config_.simulation_params.base_repertoire_params,
+ chain_type,
+ db};
+ auto base_repertoire =
+ base_repertoire_simulator.Simulate(config_.simulation_params.base_repertoire_params.number_of_metaroots);
+ std::ofstream base_repertoire_fasta;
+ std::ofstream base_repertoire_info;
+ base_repertoire_fasta.open(path::append_path(config_.io_params.output_params.output_dir,
+ config_.io_params.output_params.base_repertoire_filename));
+ base_repertoire_info.open(path::append_path(config_.io_params.output_params.output_dir,
+ config_.io_params.output_params.base_repertoire_info));
+ print_base_repertoire(base_repertoire, base_repertoire_fasta, base_repertoire_info);
+ base_repertoire_fasta.close();
+ base_repertoire_info.close();
+ INFO("== Base Repertoire ends ==");
+ return base_repertoire;
+}
+
+template
+ForestStorage IgSimulatorLaunch::__GetForestStorage(const BaseRepertoire& base_repertoire) const
+{
+ INFO("== Forest Storage generation starts ==");
+ const auto& vjf_config = config_.simulation_params.base_repertoire_params.metaroot_simulation_params.
+ cdr_labeler_config.vj_finder_config;
+ ForestStorageCreator forest_storage_creator(vjf_config,
+ config_.simulation_params.clonal_tree_simulator_params);
+ auto forest_storage = forest_storage_creator.GenerateForest(base_repertoire);
+ INFO("== Forest Storage generation ends ==");
+
+ INFO("== Forest Storage export starts ==");
+ INFO("== Full and filtered pool export start");
+ std::ofstream full, included;
+ full.open(path::append_path(config_.io_params.output_params.output_dir,
+ config_.io_params.output_params.full_pool));
+ included.open(path::append_path(config_.io_params.output_params.output_dir,
+ config_.io_params.output_params.filtered_pool));
+ ForestStorageExporter(forest_storage, full, included);
+ full.close();
+ included.close();
+ INFO("== Full and filtered pool export ends");
+
+ INFO("== Edge lists export starts");
+ EdgeListsExporters(forest_storage, config_.io_params.output_params);
+ INFO("== Edge lists export ends");
+ INFO("== Forest Storage export ends ==");
+ return forest_storage;
+}
+
+ForestStorage IgSimulatorLaunch::GetForestStorage(const BaseRepertoire& base_repertoire) const
+{
+ const auto& pool_manager_strategy = config_.simulation_params.clonal_tree_simulator_params.pool_manager_strategy;
+ if (pool_manager_strategy == PoolManagerStrategy::UniformPoolManager) {
+ return __GetForestStorage(base_repertoire);
+ } else if (pool_manager_strategy == PoolManagerStrategy::DeepTreePoolManager) {
+ return __GetForestStorage(base_repertoire);
+ } else if (pool_manager_strategy == PoolManagerStrategy::WideTreePoolManager) {
+ return __GetForestStorage(base_repertoire);
+ }
+ VERIFY(false);
+}
+
+void IgSimulatorLaunch::Run() {
+ // MTSingleton::SetSeed(1);
+ INFO("== IgSimulator starts ==");
+
+ germline_utils::ChainType chain_type = GetLaunchChainType();
+ std::vector db { GetDB(chain_type) };
+
+ const BaseRepertoire base_repertoire = GetBaseRepertoire(chain_type, db);
+ const ForestStorage forest_storage = GetForestStorage(base_repertoire);
+
+ INFO("== IgSimulator ends ==");
+}
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/ig_simulator_launch.hpp b/src/ig_simulator/ig_simulator_launch.hpp
new file mode 100644
index 00000000..f61323ce
--- /dev/null
+++ b/src/ig_simulator/ig_simulator_launch.hpp
@@ -0,0 +1,47 @@
+//
+// Created by Andrew Bzikadze on 3/15/17.
+//
+
+#pragma once
+
+#include "ig_simulator_config.hpp"
+#include "germline_utils/chain_type.hpp"
+#include "base_repertoire/base_repertoire.hpp"
+#include "clonal_trees/forest/forest.hpp"
+
+namespace ig_simulator {
+
+class IgSimulatorLaunch {
+private:
+ IgSimulatorConfig config_;
+
+private:
+ germline_utils::ChainType GetLaunchChainType() const;
+
+ std::vector
+ GetDB(const germline_utils::ChainType chain_type) const;
+
+ BaseRepertoire
+ GetBaseRepertoire(const germline_utils::ChainType chain_type,
+ std::vector& db) const;
+
+ template
+ ForestStorage __GetForestStorage(const BaseRepertoire& base_repertoire) const;
+
+ ForestStorage GetForestStorage(const BaseRepertoire& base_repertoire) const;
+
+public:
+ IgSimulatorLaunch(const IgSimulatorConfig &config) :
+ config_(config)
+ { }
+
+ void Run();
+
+ IgSimulatorLaunch() = delete;
+ IgSimulatorLaunch(const IgSimulatorLaunch&) = delete;
+ IgSimulatorLaunch(IgSimulatorLaunch&&) = delete;
+ IgSimulatorLaunch& operator=(const IgSimulatorLaunch&) = delete;
+ IgSimulatorLaunch& operator=(IgSimulatorLaunch&&) = delete;
+};
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/ig_simulator_utils.hpp b/src/ig_simulator/ig_simulator_utils.hpp
new file mode 100644
index 00000000..88c0263f
--- /dev/null
+++ b/src/ig_simulator/ig_simulator_utils.hpp
@@ -0,0 +1,38 @@
+//
+// Created by Andrew Bzikadze on 3/31/17.
+//
+
+#pragma once
+
+#include "verify.hpp"
+
+namespace ig_simulator {
+
+template
+const Pointer& check_pointer(const Pointer& p) {
+ VERIFY(p != nullptr);
+ return p;
+}
+
+template
+T check_numeric_nonnegative(const T x) {
+ static_assert(std::is_arithmetic::value, "Type has to be arithmetic");
+ VERIFY(x >= 0);
+ return x;
+}
+
+template
+T check_numeric_positive(const T x) {
+ static_assert(std::is_arithmetic::value, "Type has to be arithmetic");
+ VERIFY(x > 0);
+ return x;
+}
+
+template
+T check_probability(const T x) {
+ static_assert(std::is_floating_point::value, "Probability is floating point");
+ VERIFY(x >= 0 and x <= 1);
+ return x;
+}
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/main.cpp b/src/ig_simulator/main.cpp
new file mode 100644
index 00000000..df8bc959
--- /dev/null
+++ b/src/ig_simulator/main.cpp
@@ -0,0 +1,73 @@
+#include
+#include
+#include
+
+#include
+
+#include "ig_simulator_config.hpp"
+#include "ig_simulator_launch.hpp"
+
+void create_console_logger(std::string cfg_filename) {
+ using namespace logging;
+ std::string log_props_file = ig_simulator::igs_cfg::get().io_params.output_params.log_filename;
+ if (!path::FileExists(log_props_file)){
+ log_props_file = path::append_path(path::parent_path(cfg_filename), log_props_file);
+ }
+ logger *lg = create_logger(path::FileExists(log_props_file) ? log_props_file : "");
+ lg->add_writer(std::make_shared());
+ attach_logger(lg);
+}
+
+std::string running_time_format(const perf_counter &pc) {
+ unsigned ms = (unsigned)pc.time_ms();
+ unsigned secs = (ms / 1000) % 60;
+ unsigned mins = (ms / 1000 / 60) % 60;
+ unsigned hours = (ms / 1000 / 60 / 60);
+ boost::format bf("%u hours %u minutes %u seconds");
+ bf % hours % mins % secs;
+ return bf.str();
+}
+
+void prepare_output_dir(const ig_simulator::IgSimulatorConfig::IOParams::OutputParams & of) {
+ path::make_dir(of.output_dir);
+}
+
+void copy_configs(std::string cfg_filename, std::string to) {
+ path::make_dir(to);
+ path::copy_files_by_ext(path::parent_path(cfg_filename), to, ".info", true);
+ path::copy_files_by_ext(path::parent_path(cfg_filename), to, ".properties", true);
+}
+
+std::string get_config_fname(int argc, char **argv) {
+ if(argc == 2)
+ return std::string(argv[1]);
+ return "configs/ig_simulator/config.info";
+}
+
+std::string load_config(int argc, char **argv) {
+ std::string cfg_filename = get_config_fname(argc, argv);
+ if (!path::FileExists(cfg_filename)) {
+ std::cout << "File " << cfg_filename << " doesn't exist or can't be read!" << std::endl;
+ exit(-1);
+ }
+ ig_simulator::igs_cfg::create_instance(cfg_filename);
+ prepare_output_dir(ig_simulator::igs_cfg::get().io_params.output_params);
+ std::string path_to_copy =
+ path::append_path(ig_simulator::igs_cfg::get().io_params.output_params.output_dir, "configs");
+ path::make_dir(path_to_copy);
+ copy_configs(cfg_filename, path_to_copy);
+ return cfg_filename;
+}
+
+int main(int argc, char **argv) {
+ omp_set_num_threads(1);
+
+ segfault_handler sh;
+ perf_counter pc;
+ std::string cfg_filename = load_config(argc, argv);
+ create_console_logger(cfg_filename);
+ // variable extracted to avoid a possible bug in gcc 4.8.4
+ const auto& cfg = ig_simulator::igs_cfg::get();
+ ig_simulator::IgSimulatorLaunch(cfg).Run();
+ return 0;
+}
diff --git a/src/ig_simulator/random_generator.hpp b/src/ig_simulator/random_generator.hpp
new file mode 100644
index 00000000..52216315
--- /dev/null
+++ b/src/ig_simulator/random_generator.hpp
@@ -0,0 +1,35 @@
+//
+// Created by Andrew Bzikadze on 3/17/17.
+//
+
+#pragma once
+
+#include
+
+namespace ig_simulator {
+
+// This code is written after consulting with @eodus
+template
+class RandomGeneratorSingleton {
+private:
+ STLRandomGenerator generator_;
+
+private:
+ RandomGeneratorSingleton(Sseq seed=std::random_device()()) :
+ generator_(seed)
+ { }
+
+public:
+ static void SetSeed(Sseq seed = std::random_device()()) {
+ RandomGeneratorSingleton::GetInstance().seed(seed);
+ }
+
+ static STLRandomGenerator& GetInstance() {
+ static RandomGeneratorSingleton rg;
+ return rg.generator_;
+ }
+};
+
+using MTSingleton = RandomGeneratorSingleton;
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/simulation_routines.cpp b/src/ig_simulator/simulation_routines.cpp
new file mode 100644
index 00000000..58578ba2
--- /dev/null
+++ b/src/ig_simulator/simulation_routines.cpp
@@ -0,0 +1,21 @@
+//
+// Created by Andrew Bzikadze on 3/20/17.
+//
+
+#include "simulation_routines.hpp"
+
+namespace ig_simulator {
+
+size_t random_index(size_t low, size_t high) {
+ std::uniform_int_distribution d(low, high);
+ return d(MTSingleton::GetInstance());
+}
+
+template
+double uniform_floating_point(FloatingPoint low, FloatingPoint high) {
+ static_assert(std::is_floating_point::value, "Type has to be floating point");
+ std::uniform_real_distribution d(low, high);
+ return d(MTSingleton::GetInstance());
+}
+
+} // End namespace ig_simulator
diff --git a/src/ig_simulator/simulation_routines.hpp b/src/ig_simulator/simulation_routines.hpp
new file mode 100644
index 00000000..cab69fff
--- /dev/null
+++ b/src/ig_simulator/simulation_routines.hpp
@@ -0,0 +1,18 @@
+//
+// Created by Andrew Bzikadze on 3/20/17.
+//
+
+#pragma once
+
+#include
+#include
+#include "random_generator.hpp"
+
+namespace ig_simulator {
+
+size_t random_index(size_t low = 0, size_t high = std::numeric_limits::max());
+
+template
+double uniform_floating_point(FloatingPoint low = 0., FloatingPoint high = 0.);
+
+} // End namespace ig_simulator
diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt
index f8abc2a0..39bbd75d 100644
--- a/src/test/CMakeLists.txt
+++ b/src/test/CMakeLists.txt
@@ -5,6 +5,7 @@ include_directories(${VDJ_UTILS_DIR})
include_directories(${ALGORITHMS_DIR})
include_directories(${VJ_FINDER_DIR})
include_directories(${CDR_LABELER_DIR})
+include_directories(${IG_SIMULATOR_DIR})
link_libraries(graph_utils vdj_utils algorithms core input ${COMMON_LIBRARIES})
@@ -23,3 +24,6 @@ target_link_libraries(test_cdr_labeling cdr_labeler_library)
make_test(test_vj_finder test_vj_finder.cpp)
target_link_libraries(test_vj_finder vj_finder_library)
+
+make_test(test_ig_simulator test_ig_simulator.cpp)
+target_link_libraries(test_ig_simulator ig_simulator_library)
diff --git a/src/test/test_cdr_labeler.cpp b/src/test/test_cdr_labeler.cpp
index 71c83592..d6edc741 100644
--- a/src/test/test_cdr_labeler.cpp
+++ b/src/test/test_cdr_labeler.cpp
@@ -4,7 +4,7 @@
#include
#include
-#include
+#include
#include
#include
#include
@@ -28,8 +28,8 @@ class CDRLabelerTest : public ::testing::Test {
std::string config_fname = "configs/cdr_labeler/config.info";
config.load(config_fname);
config.vj_finder_config.algorithm_params.germline_params.loci = "IG";
- vj_finder::GermlineDbGenerator db_generator(config.vj_finder_config.io_params.input_params.germline_input,
- config.vj_finder_config.algorithm_params.germline_params);
+ germline_utils::GermlineDbGenerator db_generator(config.vj_finder_config.io_params.input_params.germline_input,
+ config.vj_finder_config.algorithm_params.germline_params);
auto v_gene_database = db_generator.GenerateVariableDb();
auto j_gene_database = db_generator.GenerateJoinDb();
auto v_labeling = cdr_labeler::GermlineDbLabeler(v_gene_database, config.cdrs_params).ComputeLabeling();
diff --git a/src/test/test_ig_simulator.cpp b/src/test/test_ig_simulator.cpp
new file mode 100644
index 00000000..1e50a7cc
--- /dev/null
+++ b/src/test/test_ig_simulator.cpp
@@ -0,0 +1,302 @@
+//
+// Created by Andrew Bzikadze on 3/23/17.
+//
+
+#include
+#include
+
+#include
+
+#include
+#include
+#include
+#include
+#include "base_repertoire/metaroot/metaroot.hpp"
+#include
+#include "convert.hpp"
+
+#include "base_repertoire/gene_chooser/uniform_gene_chooser.hpp"
+#include "base_repertoire/nucleotides_remover/uniform_nucleotides_remover.hpp"
+#include "base_repertoire/p_nucleotides_creator/uniform_nucleotides_creator.hpp"
+#include "base_repertoire/n_nucleotides_inserter/uniform_n_nucleotides_inserter.hpp"
+#include "base_repertoire/metaroot_creator/metaroot_creator.hpp"
+#include "annotation_utils/cdr_labeling_primitives.hpp"
+#include "base_repertoire/productivity_checker/productivity_checker.hpp"
+#include "annotation_utils/aa_annotation/aa_calculator.hpp"
+
+#include
+#include
+
+void create_console_logger() {
+ using namespace logging;
+ logger *lg = create_logger("");
+ lg->add_writer(std::make_shared());
+ attach_logger(lg);
+}
+
+ig_simulator::IgSimulatorConfig config;
+germline_utils::CustomGeneDatabase v_db(germline_utils::SegmentType::VariableSegment);
+germline_utils::CustomGeneDatabase d_db(germline_utils::SegmentType::DiversitySegment);
+germline_utils::CustomGeneDatabase j_db(germline_utils::SegmentType::JoinSegment);
+
+namespace ig_simulator {
+
+class IgSimulatorTest: public ::testing::Test {
+public:
+ void SetUp() {
+ omp_set_num_threads(1);
+ create_console_logger();
+ std::string config_fname = "configs/ig_simulator/config.info";
+ ig_simulator::load(config, config_fname);
+ config.germline_params.loci = "IGH";
+
+ germline_utils::GermlineDbGenerator db_generator(config.io_params.input_params.germline_input,
+ config.germline_params);
+ v_db = db_generator.GenerateVariableDb();
+ d_db = db_generator.GenerateDiversityDb();
+ j_db = db_generator.GenerateJoinDb();
+ }
+};
+
+TEST_F(IgSimulatorTest, PrepareGeneTest) {
+ {
+ seqan::Dna5String gene("GTACAACTGGAACG");
+ AbstractMetaroot::PrepareGene(gene, 0, 1);
+ ASSERT_EQ(core::seqan_string_to_string(gene), "GTACAACTGGAAC");
+ AbstractMetaroot::PrepareGene(gene, 2, 1);
+ ASSERT_EQ(core::seqan_string_to_string(gene), "ACAACTGGAA");
+ AbstractMetaroot::PrepareGene(gene, 5, 3);
+ ASSERT_EQ(core::seqan_string_to_string(gene), "TG");
+ AbstractMetaroot::PrepareGene(gene, -2, -2);
+ ASSERT_EQ(core::seqan_string_to_string(gene), "CATGCA");
+ AbstractMetaroot::PrepareGene(gene, -3, 2);
+ ASSERT_EQ(core::seqan_string_to_string(gene), "ATGCATG");
+ }
+}
+
+TEST_F(IgSimulatorTest, VDJMetarootSequenceCorrect) {
+ {
+ std::string vd_ins("ACCGT");
+ std::string dj_ins("TTTT");
+ VDJMetaroot root(&v_db, &d_db, &j_db,
+ 0, 0, 0,
+ annotation_utils::CDRLabeling(),
+ 5, 1, 2, 3,
+ vd_ins, dj_ins);
+ std::string correct_root_seq(
+ std::string(
+ "CAGGTTCAGCTGGTGCAGTCTGGAGCTGAGGTGAAGAAGCCTGGGGCCTCAGTGAAGGTCTCCTGCAAGGCTTCTGGTTACACCTTT"
+ "ACCAGCTATGGTATCAGCTGGGTGCGACAGGCCCCTGGACAAGGGCTTGAGTGGATGGGATGGATCAGCGCTTACAATGGTAACACA"
+ "AACTATGCACAGAAGCTCCAGGGCAGAGTCACCATGACCACAGACACATCCACGAGCACAGCCTACATGGAGCTGAGGAGCCTGAGA"
+ "TCTGACGACACGGCCGTGTATTACTGTGCG") +
+ vd_ins +
+ "GTACAACTGGAACG" +
+ dj_ins +
+ "GAATACTTCCAGCACTGGGGCCAGGGCACCCTGGTCACCGTCTCCTCAG");
+ ASSERT_EQ(correct_root_seq, core::dna5String_to_string(root.Sequence()));
+ }
+
+ {
+ std::string vd_ins("ACCGT");
+ std::string dj_ins("TTTT");
+ VDJMetaroot root(&v_db, &d_db, &j_db,
+ 0, 0, 0,
+ annotation_utils::CDRLabeling(),
+ -5, -2, -2, -3,
+ vd_ins, dj_ins);
+ std::string correct_root_seq(
+ std::string(
+ "CAGGTTCAGCTGGTGCAGTCTGGAGCTGAGGTGAAGAAGCCTGGGGCCTCAGTGAAGGTCTCCTGCAAGGCTTCTGGTTACACCTTT"
+ "ACCAGCTATGGTATCAGCTGGGTGCGACAGGCCCCTGGACAAGGGCTTGAGTGGATGGGATGGATCAGCGCTTACAATGGTAACACA"
+ "AACTATGCACAGAAGCTCCAGGGCAGAGTCACCATGACCACAGACACATCCACGAGCACAGCCTACATGGAGCTGAGGAGCCTGAGA"
+ "TCTGACGACACGGCCGTGTATTACTGTGCGAGAGA") +
+ "TCTCT" +
+ vd_ins +
+ "CC" +
+ "GGTACAACTGGAACGAC" +
+ "GT" +
+ dj_ins +
+ "AGC" +
+ "GCTGAATACTTCCAGCACTGGGGCCAGGGCACCCTGGTCACCGTCTCCTCAG");
+ ASSERT_EQ(correct_root_seq, core::dna5String_to_string(root.Sequence()));
+ }
+ {
+ std::string vd_ins("ACCGT");
+ std::string dj_ins("TTTT");
+ VDJMetaroot root(&v_db, &d_db, &j_db,
+ 0, 0, 0,
+ annotation_utils::CDRLabeling(),
+ -5, 0, -3, -3,
+ vd_ins, dj_ins);
+ std::string correct_root_seq(
+ std::string(
+ "CAGGTTCAGCTGGTGCAGTCTGGAGCTGAGGTGAAGAAGCCTGGGGCCTCAGTGAAGGTCTCCTGCAAGGCTTCTGGTTACACCTTT"
+ "ACCAGCTATGGTATCAGCTGGGTGCGACAGGCCCCTGGACAAGGGCTTGAGTGGATGGGATGGATCAGCGCTTACAATGGTAACACA"
+ "AACTATGCACAGAAGCTCCAGGGCAGAGTCACCATGACCACAGACACATCCACGAGCACAGCCTACATGGAGCTGAGGAGCCTGAGA"
+ "TCTGACGACACGGCCGTGTATTACTGTGCGAGAGA") +
+ "TCTCT" +
+ vd_ins +
+ "GGTACAACTGGAACGAC" +
+ "GTC" +
+ dj_ins +
+ "AGC" +
+ "GCTGAATACTTCCAGCACTGGGGCCAGGGCACCCTGGTCACCGTCTCCTCAG");
+ ASSERT_EQ(correct_root_seq, core::dna5String_to_string(root.Sequence()));
+ }
+
+ {
+ std::string vd_ins("ACCGT");
+ std::string dj_ins("TTTT");
+ VDJMetaroot root(&v_db, &d_db, &j_db,
+ 0, 0, 0,
+ annotation_utils::CDRLabeling(),
+ 0, 0, 0, 0,
+ vd_ins, dj_ins);
+ std::string correct_root_seq(
+ std::string(
+ "CAGGTTCAGCTGGTGCAGTCTGGAGCTGAGGTGAAGAAGCCTGGGGCCTCAGTGAAGGTCTCCTGCAAGGCTTCTGGTTACACCTTT"
+ "ACCAGCTATGGTATCAGCTGGGTGCGACAGGCCCCTGGACAAGGGCTTGAGTGGATGGGATGGATCAGCGCTTACAATGGTAACACA"
+ "AACTATGCACAGAAGCTCCAGGGCAGAGTCACCATGACCACAGACACATCCACGAGCACAGCCTACATGGAGCTGAGGAGCCTGAGA"
+ "TCTGACGACACGGCCGTGTATTACTGTGCGAGAGA") +
+ vd_ins +
+ "GGTACAACTGGAACGAC" +
+ dj_ins +
+ "GCTGAATACTTCCAGCACTGGGGCCAGGGCACCCTGGTCACCGTCTCCTCAG");
+ ASSERT_EQ(correct_root_seq, core::dna5String_to_string(root.Sequence()));
+ }
+}
+
+TEST_F(IgSimulatorTest, VJMetarootSequenceCorrect) {
+ {
+ std::string vj_ins("ACCGT");
+ VJMetaroot root(&v_db, &j_db,
+ 0, 0,
+ annotation_utils::CDRLabeling(),
+ 5, 3, vj_ins);
+ std::string correct_root_seq(
+ std::string(
+ "CAGGTTCAGCTGGTGCAGTCTGGAGCTGAGGTGAAGAAGCCTGGGGCCTCAGTGAAGGTCTCCTGCAAGGCTTCTGGTTACACCTTT"
+ "ACCAGCTATGGTATCAGCTGGGTGCGACAGGCCCCTGGACAAGGGCTTGAGTGGATGGGATGGATCAGCGCTTACAATGGTAACACA"
+ "AACTATGCACAGAAGCTCCAGGGCAGAGTCACCATGACCACAGACACATCCACGAGCACAGCCTACATGGAGCTGAGGAGCCTGAGA"
+ "TCTGACGACACGGCCGTGTATTACTGTGCG") +
+ vj_ins +
+ "GAATACTTCCAGCACTGGGGCCAGGGCACCCTGGTCACCGTCTCCTCAG");
+ ASSERT_EQ(correct_root_seq, core::dna5String_to_string(root.Sequence()));
+ }
+}
+
+//TEST_F(IgSimulatorTest, MetarootCreaterSpeedTest) {
+// {
+// config.algorithm_params.germline_params.loci = "IGH";
+//
+// germline_utils::GermlineDbGenerator db_generator(config.io_params.input_params.germline_input,
+// config.algorithm_params.germline_params);
+// v_db = db_generator.GenerateVariableDb();
+// d_db = db_generator.GenerateDiversityDb();
+// j_db = db_generator.GenerateJoinDb();
+//
+// VDJMetarootCreator metaroot_creator(config.simulation_params.base_repertoire_params.metaroot_simulation_params,
+// &v_db, &d_db, &j_db);
+//
+// auto t1 = std::chrono::high_resolution_clock::now();
+// size_t N((int) 1e5);
+// for (size_t i = 0; i < N; ++i) {
+// auto root = metaroot_creator.Createroot();
+// }
+// auto t2 = std::chrono::high_resolution_clock::now();
+// std::chrono::duration fp = t2 - t1;
+// std::cout << "Simulation of " << N << " VDJ metaroots took " << fp.count() << "ms" << std::endl;
+// }
+//
+// {
+// config.algorithm_params.germline_params.loci = "IGL";
+//
+// germline_utils::GermlineDbGenerator db_generator(config.io_params.input_params.germline_input,
+// config.algorithm_params.germline_params);
+// v_db = db_generator.GenerateVariableDb();
+// INFO("Generation of DB for join segments...");
+// j_db = db_generator.GenerateJoinDb();
+//
+// VDJMetarootCreator metaroot_creator(config.simulation_params.base_repertoire_params.metaroot_simulation_params,
+// &v_db, &d_db, &j_db);
+//
+// auto t1 = std::chrono::high_resolution_clock::now();
+// size_t N((int) 1e5);
+// for (size_t i = 0; i < N; ++i) {
+// metaroot_creator.Createroot()->Sequence();
+// }
+// auto t2 = std::chrono::high_resolution_clock::now();
+// std::chrono::duration fp = t2 - t1;
+// std::cout << "Simulation of " << N << " VJ metaroots took " << fp.count() << "ms" << std::endl;
+// }
+//}
+
+// TEST_F(IgSimulatorTest, MetarootCreaterCDRTest) {
+// {
+// config.algorithm_params.germline_params.loci = "IGH";
+//
+// germline_utils::GermlineDbGenerator db_generator(config.io_params.input_params.germline_input,
+// config.algorithm_params.germline_params);
+// v_db = db_generator.GenerateVariableDb();
+// d_db = db_generator.GenerateDiversityDb();
+// j_db = db_generator.GenerateJoinDb();
+//
+// std::vector db;
+// db.emplace_back(std::move(v_db));
+// db.emplace_back(std::move(d_db));
+// db.emplace_back(std::move(j_db));
+//
+// VDJMetarootCreator metaroot_creator(config.simulation_params.base_repertoire_params.metaroot_simulation_params,
+// db);
+//
+// MTSingleton::SetSeed(7);
+// auto root = metaroot_creator.Createroot();
+// INFO(*root);
+// INFO(root->Sequence());
+// ASSERT_EQ(root->CDRLabeling().cdr1.start_pos, 75);
+// ASSERT_EQ(root->CDRLabeling().cdr1.end_pos, 98);
+// ASSERT_EQ(root->CDRLabeling().cdr2.start_pos, 150);
+// ASSERT_EQ(root->CDRLabeling().cdr2.end_pos, 173);
+// ASSERT_EQ(root->CDRLabeling().cdr3.start_pos, 288);
+// ASSERT_EQ(root->CDRLabeling().cdr3.end_pos, 366);
+// }
+// }
+
+TEST_F(IgSimulatorTest, ProductiveChecker) {
+ {
+ config.germline_params.loci = "IGH";
+
+ germline_utils::GermlineDbGenerator db_generator(config.io_params.input_params.germline_input,
+ config.germline_params);
+ v_db = db_generator.GenerateVariableDb();
+ d_db = db_generator.GenerateDiversityDb();
+ j_db = db_generator.GenerateJoinDb();
+
+ std::vector db;
+ db.emplace_back(std::move(v_db));
+ db.emplace_back(std::move(d_db));
+ db.emplace_back(std::move(j_db));
+
+ ProductivityChecker productivity_checker(std::unique_ptr
+ (new annotation_utils::SimpleAACalculator));
+
+ VDJMetarootCreator metaroot_creator(config.simulation_params.base_repertoire_params.metaroot_simulation_params,
+ db);
+
+ auto t1 = std::chrono::high_resolution_clock::now();
+ size_t N((int) 1e4);
+ size_t prod = 0;
+ for (size_t i = 0; i < N; ++i) {
+ auto root = metaroot_creator.Createroot();
+ if (productivity_checker.IsProductive(root)) {
+ prod++;
+ }
+ }
+ std::cout << prod << " / " << N << std::endl;
+ auto t2 = std::chrono::high_resolution_clock::now();
+ std::chrono::duration fp = t2 - t1;
+ std::cout << "Simulation of " << N << " VDJ metaroots took " << fp.count() << "ms" << std::endl;
+ }
+}
+
+} // End namespace ig_simulator
diff --git a/src/test/test_vj_finder.cpp b/src/test/test_vj_finder.cpp
index 739cd727..3c830493 100644
--- a/src/test/test_vj_finder.cpp
+++ b/src/test/test_vj_finder.cpp
@@ -4,7 +4,7 @@
#include
#include
-#include
+#include
#include
#include
@@ -105,8 +105,8 @@ TEST_F(VJFinderTest, BaseVJFinderTest) {
vj_finder_config.algorithm_params.fix_crop_fill_params.fill_right = true;
vj_finder_config.algorithm_params.fix_crop_fill_params.fix_right = 3;
read_archive.ExtractFromFile("test_dataset/vj_finder_test.fastq");
- vj_finder::GermlineDbGenerator db_generator(vj_finder_config.io_params.input_params.germline_input,
- vj_finder_config.algorithm_params.germline_params);
+ germline_utils::GermlineDbGenerator db_generator(vj_finder_config.io_params.input_params.germline_input,
+ vj_finder_config.algorithm_params.germline_params);
auto v_gene_database = db_generator.GenerateVariableDb();
auto j_gene_database = db_generator.GenerateJoinDb();
vj_finder::VJParallelProcessor processor(read_archive,
diff --git a/src/umi_experiments/CMakeLists.txt b/src/umi_experiments/CMakeLists.txt
index c5daee39..7041899e 100644
--- a/src/umi_experiments/CMakeLists.txt
+++ b/src/umi_experiments/CMakeLists.txt
@@ -16,20 +16,20 @@ file(GLOB HEADER_FILES **/*.hpp)
#set(CMAKE_BUILD_TYPE "Debug")
-add_executable(check_graph_symmetry tools/check_graph_symmetry.cpp utils.cpp)
-add_executable(umi_to_fastq tools/umi_to_fastq.cpp utils.cpp umi_utils.cpp)
+add_executable(check_graph_symmetry tools/check_graph_symmetry.cpp ig_simulator_utils.cpp)
+add_executable(umi_to_fastq tools/umi_to_fastq.cpp ig_simulator_utils.cpp umi_utils.cpp)
add_executable(simulate_tiny_dataset tools/simulate_tiny_dataset.cpp)
-add_executable(umi_graph tools/umi_graph.cpp utils.cpp umi_utils.cpp)
-add_executable(analyze_intermed_clusters tools/analyze_intermed_clusters.cpp utils.cpp)
-add_executable(find_bad_cluster tools/find_bad_cluster.cpp utils.cpp clusterer.cpp)
+add_executable(umi_graph tools/umi_graph.cpp ig_simulator_utils.cpp umi_utils.cpp)
+add_executable(analyze_intermed_clusters tools/analyze_intermed_clusters.cpp ig_simulator_utils.cpp)
+add_executable(find_bad_cluster tools/find_bad_cluster.cpp ig_simulator_utils.cpp clusterer.cpp)
add_executable(report_pcr_error_rate tools/report_pcr_error_rate.cpp tools/error_analyzer.cpp tools/error_analyzer.hpp umi_utils.cpp utils/io.cpp)
-add_executable(reads_by_umi_stats stats/reads_by_umi_stats.cpp utils.cpp umi_utils.cpp stats/dist_distribution_stats.cpp)
-add_executable(print_graph_decomposition_stats stats/print_graph_decomposition_stats.cpp utils.cpp)
-add_executable(pairwise_dist_stats stats/pairwise_dist_stats.cpp utils/io.cpp clusterer.cpp utils.cpp)
-add_executable(dists_inside_clusters stats/dists_inside_clusters.cpp utils/io.cpp clusterer.cpp utils.cpp)
+add_executable(reads_by_umi_stats stats/reads_by_umi_stats.cpp ig_simulator_utils.cpp umi_utils.cpp stats/dist_distribution_stats.cpp)
+add_executable(print_graph_decomposition_stats stats/print_graph_decomposition_stats.cpp ig_simulator_utils.cpp)
+add_executable(pairwise_dist_stats stats/pairwise_dist_stats.cpp utils/io.cpp clusterer.cpp ig_simulator_utils.cpp)
+add_executable(dists_inside_clusters stats/dists_inside_clusters.cpp utils/io.cpp clusterer.cpp ig_simulator_utils.cpp)
add_executable(umi_correction_stats stats/umi_correction_stats.cpp umi_utils.cpp utils/io.cpp)
-add_executable(umi_naive naive/umi_naive.cpp ${HEADER_FILES} utils.cpp umi_utils.cpp clusterer.cpp utils/io.cpp)
+add_executable(umi_naive naive/umi_naive.cpp ${HEADER_FILES} ig_simulator_utils.cpp umi_utils.cpp clusterer.cpp utils/io.cpp)
add_executable(report_umi_abundance report_umi_abundance.cpp utils/io.cpp)
-add_executable(cluster_reads cluster_reads.cpp ${HEADER_FILES} utils.cpp umi_utils.cpp clusterer.cpp utils/io.cpp ../fast_ig_tools/fast_ig_tools.cpp)
+add_executable(cluster_reads cluster_reads.cpp ${HEADER_FILES} ig_simulator_utils.cpp umi_utils.cpp clusterer.cpp utils/io.cpp ../fast_ig_tools/fast_ig_tools.cpp)
diff --git a/src/umi_experiments/utils.cpp b/src/umi_experiments/ig_simulator_utils.cpp
similarity index 100%
rename from src/umi_experiments/utils.cpp
rename to src/umi_experiments/ig_simulator_utils.cpp
diff --git a/src/vdj_utils/CMakeLists.txt b/src/vdj_utils/CMakeLists.txt
index 75bcd355..fbf146b9 100644
--- a/src/vdj_utils/CMakeLists.txt
+++ b/src/vdj_utils/CMakeLists.txt
@@ -8,6 +8,7 @@ add_library(vdj_utils STATIC
germline_utils/lymphocyte_type.cpp
germline_utils/chain_type.cpp
germline_utils/germline_gene_type.cpp
+ germline_utils/germline_db_generator.cpp
germline_utils/germline_databases/immune_gene_database.cpp
germline_utils/germline_databases/chain_database.cpp
germline_utils/germline_databases/custom_gene_database.cpp
@@ -21,6 +22,6 @@ add_library(vdj_utils STATIC
annotation_utils/annotated_clone.cpp
annotation_utils/annotated_clone_calculator.cpp
annotation_utils/annotated_clone_set.cpp
- )
+ germline_utils/germline_config.cpp)
target_link_libraries(vdj_utils core ${COMMON_LIBRARIES})
diff --git a/src/vdj_utils/annotation_utils/aa_annotation/aa_calculator.cpp b/src/vdj_utils/annotation_utils/aa_annotation/aa_calculator.cpp
index 0a4dc635..08dd6b39 100644
--- a/src/vdj_utils/annotation_utils/aa_annotation/aa_calculator.cpp
+++ b/src/vdj_utils/annotation_utils/aa_annotation/aa_calculator.cpp
@@ -3,14 +3,14 @@
#include "aa_calculator.hpp"
namespace annotation_utils {
- bool SimpleAACalculator::ComputeInFrame(const CDRLabeling &cdr_labeling) {
+ bool SimpleAACalculator::ComputeInFrame(const CDRLabeling &cdr_labeling) const {
CDRRange end_region = (cdr_labeling.cdr3.Valid()) ? cdr_labeling.cdr3 : cdr_labeling.cdr2;
VERIFY_MSG(end_region.Valid() and cdr_labeling.cdr1.Valid(),
"CDRs regions are not defined, ORF cannot be identified");
return (end_region.end_pos - cdr_labeling.cdr1.start_pos + 1) % 3 == 0;
}
- bool SimpleAACalculator::FindStopCodon(const AAString &aa_str) {
+ bool SimpleAACalculator::FindStopCodon(const AAString &aa_str) const {
bool has_stop_codon = false;
for(size_t i = 0; i < seqan::length(aa_str); i++)
if(aa_str[i] == '*') {
@@ -21,7 +21,7 @@ namespace annotation_utils {
}
AminoAcidAnnotation SimpleAACalculator::ComputeAminoAcidAnnotation(const core::Read &read,
- const CDRLabeling &cdr_labeling) {
+ const CDRLabeling &cdr_labeling) const {
VERIFY_MSG(cdr_labeling.cdr1.Valid(), "CDR1 is not defined, AA sequence cannot be computed");
using namespace seqan;
StringSet, Owner > > aa_seqs;
diff --git a/src/vdj_utils/annotation_utils/aa_annotation/aa_calculator.hpp b/src/vdj_utils/annotation_utils/aa_annotation/aa_calculator.hpp
index 0b5d2ba0..6f0f6bd9 100644
--- a/src/vdj_utils/annotation_utils/aa_annotation/aa_calculator.hpp
+++ b/src/vdj_utils/annotation_utils/aa_annotation/aa_calculator.hpp
@@ -8,19 +8,21 @@ namespace annotation_utils {
class BaseAACalculator {
public:
virtual AminoAcidAnnotation ComputeAminoAcidAnnotation(const core::Read& read,
- const CDRLabeling &cdr_labeling) = 0;
+ const CDRLabeling &cdr_labeling) const = 0;
virtual ~BaseAACalculator() { }
};
class SimpleAACalculator : public BaseAACalculator {
private:
- bool ComputeInFrame(const CDRLabeling &cdr_labeling);
+ bool ComputeInFrame(const CDRLabeling &cdr_labeling) const;
- bool FindStopCodon(const AAString &aa_str);
+ bool FindStopCodon(const AAString &aa_str) const;
public:
AminoAcidAnnotation ComputeAminoAcidAnnotation(const core::Read& read,
- const CDRLabeling &cdr_labeling);
+ const CDRLabeling &cdr_labeling) const override;
};
+
+ using BaseAACalculatorPtr = std::unique_ptr;
}
\ No newline at end of file
diff --git a/src/vdj_utils/germline_utils/germline_config.cpp b/src/vdj_utils/germline_utils/germline_config.cpp
new file mode 100644
index 00000000..5b0404b5
--- /dev/null
+++ b/src/vdj_utils/germline_utils/germline_config.cpp
@@ -0,0 +1,28 @@
+//
+// Created by Andrew Bzikadze on 3/16/17.
+//
+
+#include "germline_config.hpp"
+#include
+#include
+#include
+
+namespace germline_utils {
+
+void load(GermlineInput &gi, boost::property_tree::ptree const &pt, bool) {
+ using config_common::load;
+ load(gi.germline_filenames_config, pt, "germline_filenames_config");
+ load(gi.ig_dir, pt, "ig_dir");
+ load(gi.tcr_dir, pt, "tcr_dir");
+}
+
+void load(GermlineParams &gp, boost::property_tree::ptree const &pt, bool) {
+ using config_common::load;
+ load(gp.germline_dir, pt, "germline_dir");
+ load(gp.loci, pt, "loci");
+ load(gp.organism, pt, "organism");
+ load(gp.pseudogenes, pt, "pseudogenes");
+}
+
+
+} // End namespace germline_utils
diff --git a/src/vdj_utils/germline_utils/germline_config.hpp b/src/vdj_utils/germline_utils/germline_config.hpp
new file mode 100644
index 00000000..c17c14ab
--- /dev/null
+++ b/src/vdj_utils/germline_utils/germline_config.hpp
@@ -0,0 +1,29 @@
+//
+// Created by Andrew Bzikadze on 3/16/17.
+//
+
+#pragma once
+#include "io/library.hpp"
+#include "config_singl.hpp"
+#include
+
+namespace germline_utils {
+
+struct GermlineInput {
+ std::string ig_dir;
+ std::string tcr_dir;
+ std::string germline_filenames_config;
+};
+
+struct GermlineParams {
+ std::string germline_dir;
+ std::string organism;
+ std::string loci;
+ bool pseudogenes;
+};
+
+
+void load(GermlineInput &gi, boost::property_tree::ptree const &pt, bool);
+void load(GermlineParams &gp, boost::property_tree::ptree const &pt, bool);
+
+} // End namespace germline_utils
diff --git a/src/vj_finder/germline_db_generator.cpp b/src/vdj_utils/germline_utils/germline_db_generator.cpp
similarity index 66%
rename from src/vj_finder/germline_db_generator.cpp
rename to src/vdj_utils/germline_utils/germline_db_generator.cpp
index 545ea5cb..195cf1ed 100644
--- a/src/vj_finder/germline_db_generator.cpp
+++ b/src/vdj_utils/germline_utils/germline_db_generator.cpp
@@ -2,72 +2,67 @@
#include "germline_db_generator.hpp"
-#include
-
-namespace vj_finder {
- class LociParam {
- public:
- static bool LociIncludeIg(std::string loci) {
- if(loci.size() < 2)
- return false;
- return loci == "all" or loci.substr(0, 2) == "IG";
- }
+namespace germline_utils {
+ bool LociParam::LociIncludeIg(std::string loci) {
+ if(loci.size() < 2)
+ return false;
+ return loci == "all" or loci.substr(0, 2) == "IG";
+ }
- static bool LociIncludeTr(std::string loci) {
- if(loci.size() < 2)
- return false;
- return loci == "all" or loci.substr(0, 2) == "TR";
- }
+ bool LociParam::LociIncludeTr(std::string loci) {
+ if(loci.size() < 2)
+ return false;
+ return loci == "all" or loci.substr(0, 2) == "TR";
+ }
- static bool LociIncludeIgh(std::string loci) {
- return loci == "all" or loci == "IGH" or loci == "IG";
- }
+ bool LociParam::LociIncludeIgh(std::string loci) {
+ return loci == "all" or loci == "IGH" or loci == "IG";
+ }
- static bool LociIncludeIgk(std::string loci) {
- return loci == "all" or loci == "IGK" or loci == "IG";
- }
+ bool LociParam::LociIncludeIgk(std::string loci) {
+ return loci == "all" or loci == "IGK" or loci == "IG";
+ }
- static bool LociIncludeIgl(std::string loci) {
- return loci == "all" or loci == "IGL" or loci == "IG";
- }
+ bool LociParam::LociIncludeIgl(std::string loci) {
+ return loci == "all" or loci == "IGL" or loci == "IG";
+ }
- static bool LociIncludeTra(std::string loci) {
- return loci == "all" or loci == "TRA" or loci == "TR";
- }
+ bool LociParam::LociIncludeTra(std::string loci) {
+ return loci == "all" or loci == "TRA" or loci == "TR";
+ }
- static bool LociIncludeTrb(std::string loci) {
- return loci == "all" or loci == "TRB" or loci == "TR";
- }
+ bool LociParam::LociIncludeTrb(std::string loci) {
+ return loci == "all" or loci == "TRB" or loci == "TR";
+ }
- static bool LociIncludeTrg(std::string loci) {
- return loci == "all" or loci == "TRG" or loci == "TR";
- }
+ bool LociParam::LociIncludeTrg(std::string loci) {
+ return loci == "all" or loci == "TRG" or loci == "TR";
+ }
- static bool LociIncludeTrd(std::string loci) {
- return loci == "all" or loci == "TRD" or loci == "TR";
- }
+ bool LociParam::LociIncludeTrd(std::string loci) {
+ return loci == "all" or loci == "TRD" or loci == "TR";
+ }
- static std::vector ConvertIntoChainTypes(std::string loci) {
- std::vector chain_types;
- if(loci.size() < 2)
- return chain_types;
- if(LociIncludeIgh(loci))
- chain_types.push_back(germline_utils::ChainType("IGH"));
- if(LociIncludeIgk(loci))
- chain_types.push_back(germline_utils::ChainType("IGK"));
- if(LociIncludeIgl(loci))
- chain_types.push_back(germline_utils::ChainType("IGL"));
- if(LociIncludeTra(loci))
- chain_types.push_back(germline_utils::ChainType("TRA"));
- if(LociIncludeTrb(loci))
- chain_types.push_back(germline_utils::ChainType("TRB"));
- if(LociIncludeTrg(loci))
- chain_types.push_back(germline_utils::ChainType("TRG"));
- if(LociIncludeTrd(loci))
- chain_types.push_back(germline_utils::ChainType("TRD"));
+ std::vector LociParam::ConvertIntoChainTypes(std::string loci) {
+ std::vector chain_types;
+ if (loci.size() < 2)
return chain_types;
- }
- };
+ if (LociIncludeIgh(loci))
+ chain_types.push_back(germline_utils::ChainType("IGH"));
+ if (LociIncludeIgk(loci))
+ chain_types.push_back(germline_utils::ChainType("IGK"));
+ if (LociIncludeIgl(loci))
+ chain_types.push_back(germline_utils::ChainType("IGL"));
+ if (LociIncludeTra(loci))
+ chain_types.push_back(germline_utils::ChainType("TRA"));
+ if (LociIncludeTrb(loci))
+ chain_types.push_back(germline_utils::ChainType("TRB"));
+ if (LociIncludeTrg(loci))
+ chain_types.push_back(germline_utils::ChainType("TRG"));
+ if (LociIncludeTrd(loci))
+ chain_types.push_back(germline_utils::ChainType("TRD"));
+ return chain_types;
+ }
class GermlineFilesConfig {
struct ExtendedImmuneGeneType {
@@ -139,10 +134,10 @@ namespace vj_finder {
};
class ChainDirectoryParam {
- const VJFinderConfig::IOParams::InputParams::GermlineInput &gi_;
+ const germline_utils::GermlineInput &gi_;
public:
- ChainDirectoryParam(const VJFinderConfig::IOParams::InputParams::GermlineInput &gi) :
+ ChainDirectoryParam(const germline_utils::GermlineInput &gi) :
gi_(gi) { }
std::string GetDirByChainType(germline_utils::ChainType chain_type) {
@@ -168,6 +163,11 @@ namespace vj_finder {
germline_files_config.GetFilenameByImmuneGeneType(
ImmuneGeneType(*it, SegmentType::VariableSegment),
germ_params_.pseudogenes)));
+ if (it->IsVDJ())
+ d_genes_fnames_.push_back(path::append_path(lymph_dir,
+ germline_files_config.GetFilenameByImmuneGeneType(
+ ImmuneGeneType(*it, SegmentType::DiversitySegment),
+ germ_params_.pseudogenes)));
j_genes_fnames_.push_back(path::append_path(lymph_dir,
germline_files_config.GetFilenameByImmuneGeneType(
ImmuneGeneType(*it, SegmentType::JoinSegment),
@@ -176,6 +176,11 @@ namespace vj_finder {
INFO(v_genes_fnames_.size() << " V gene segment files will be used for DB: ");
for(size_t i = 0; i < v_genes_fnames_.size(); i++)
INFO(chain_types_[i] << ": " << v_genes_fnames_[i]);
+
+ INFO(d_genes_fnames_.size() << " D gene segment files will be used for DB: ");
+ for(size_t i = 0; i < d_genes_fnames_.size(); i++)
+ INFO(chain_types_[i] << ": " << d_genes_fnames_[i]);
+
INFO(j_genes_fnames_.size() << " J gene segment files will be used for DB: ");
for(size_t i = 0; i < j_genes_fnames_.size(); i++)
INFO(chain_types_[i] << ": " << j_genes_fnames_[i]);
@@ -190,6 +195,15 @@ namespace vj_finder {
return v_custom_db;
}
+ germline_utils::CustomGeneDatabase GermlineDbGenerator::GenerateDiversityDb() {
+ germline_utils::CustomGeneDatabase d_custom_db(germline_utils::SegmentType::DiversitySegment);
+ for(size_t i = 0; i < d_genes_fnames_.size(); i++)
+ d_custom_db.AddDatabase(germline_utils::ImmuneGeneType(chain_types_[i],
+ germline_utils::SegmentType::DiversitySegment),
+ d_genes_fnames_[i]);
+ return d_custom_db;
+ }
+
germline_utils::CustomGeneDatabase GermlineDbGenerator::GenerateJoinDb() {
germline_utils::CustomGeneDatabase j_custom_db(germline_utils::SegmentType::JoinSegment);
for(size_t i = 0; i < j_genes_fnames_.size(); i++)
diff --git a/src/vdj_utils/germline_utils/germline_db_generator.hpp b/src/vdj_utils/germline_utils/germline_db_generator.hpp
new file mode 100644
index 00000000..8f4a42df
--- /dev/null
+++ b/src/vdj_utils/germline_utils/germline_db_generator.hpp
@@ -0,0 +1,47 @@
+#pragma once
+
+#include "germline_utils/germline_config.hpp"
+#include