diff --git a/CMakeLists.txt b/CMakeLists.txt index 5aa9b0ca..a2ea67ef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,7 +61,7 @@ install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/py" PATTERN "*.pyc" EXCLUDE PATTERN ".gitignore" EXCLUDE) -set(tools "igrec.py;mass_spectra_analyzer.py;dense_subgraph_finder.py;igquast.py;barcoded_igrec.py;diversity_analyzer.py") +set(tools "igrec.py;mass_spectra_analyzer.py;dense_subgraph_finder.py;igquast.py;barcoded_igrec.py;diversity_analyzer.py;ig_simulator.py") foreach(tool ${tools}) install(PROGRAMS ${tool} DESTINATION "${INSTALL_DIR}" diff --git a/Makefile b/Makefile index 8071db42..03ace38c 100644 --- a/Makefile +++ b/Makefile @@ -48,6 +48,9 @@ cdr: cmake umi: cmake $(MAKE) -C build/release umi_correction_stats umi_graph umi_naive umi_to_fastq +igs: cmake + $(MAKE) -C build/release ig_simulator + clean: -rm -r build @@ -55,4 +58,5 @@ clean_tests: -rm *.pyc -rm -r igrec_test -rm -r ms_analyzer_test + -rm -r ig_simulator_test -rm *~ diff --git a/configs/ig_simulator/config.info b/configs/ig_simulator/config.info new file mode 100644 index 00000000..0a870023 --- /dev/null +++ b/configs/ig_simulator/config.info @@ -0,0 +1,107 @@ +io_params { + input_params { + germline_input { + ig_dir IG_antevolo + tcr_dir TCR + germline_filenames_config ./configs/vj_finder/germline_files_config.txt + } + cdr_labeler_config_filename ./configs/cdr_labeler/config.info + } + + output_params { + output_dir ig_simulator_test + log_filename log.properties + base_repertoire_filename base_repertoire.fasta + base_repertoire_info base_repertoire.info + filtered_pool filtered_pool.fasta + full_pool full_pool.fasta + trees_dir trees_dir + } +} + +germline_params { + organism human + loci IGH + pseudogenes false + germline_dir ./data/germline +} + +simulation_params { + base_repertoire_params { + metaroot_simulation_params { + gene_chooser_params { + gene_chooser_method uniform + } + + nucleotides_remover_params { + nucleotides_remover_method uniform + uniform_remover_params { + max_remove_v_gene 20 + max_remove_d_gene_left 5 + max_remove_d_gene_right 5 + max_remove_j_gene 10 + } + } + + p_nucleotides_creator_params { + p_nucleotides_creator_method uniform + uniform_creator_params { + max_create_v_gene 5 + max_create_d_gene_left 3 + max_create_d_gene_right 3 + max_create_j_gene 3 + } + } + + n_nucleotides_inserter_params { + n_nucleotides_method uniform + uniform_inserter_params { + max_vj_insertion 10 + max_vd_insertion 21 + max_dj_insertion 23 + } + } + + cleavage_params { + prob_cleavage_v 0.5 + prob_cleavage_d_left 0.5 + prob_cleavage_d_right 0.5 + prob_cleavage_j 0.5 + } + } + + multiplicity_creator_params { + multiplicity_method geometric + geometric_params { + lambda 0.1 + } + } + + productive_params { + productive_part 1 + } + + number_of_metaroots 10 + } + + clonal_tree_simulator_params { + tree_size_generator_params { + tree_size_generator_method geometric + geometric_params { + lambda 0.001 + } + } + + shm_creator_params { + shm_creator_method poisson + poisson_params { + lambda 2 + } + } + + pool_manager_strategy wide; uniform, wide, deep + prob_ret_to_pool 0.9 + lambda_distr_n_children 0.3 + + } +} diff --git a/configs/ig_simulator/config.info.template b/configs/ig_simulator/config.info.template new file mode 100644 index 00000000..0a870023 --- /dev/null +++ b/configs/ig_simulator/config.info.template @@ -0,0 +1,107 @@ +io_params { + input_params { + germline_input { + ig_dir IG_antevolo + tcr_dir TCR + germline_filenames_config ./configs/vj_finder/germline_files_config.txt + } + cdr_labeler_config_filename ./configs/cdr_labeler/config.info + } + + output_params { + output_dir ig_simulator_test + log_filename log.properties + base_repertoire_filename base_repertoire.fasta + base_repertoire_info base_repertoire.info + filtered_pool filtered_pool.fasta + full_pool full_pool.fasta + trees_dir trees_dir + } +} + +germline_params { + organism human + loci IGH + pseudogenes false + germline_dir ./data/germline +} + +simulation_params { + base_repertoire_params { + metaroot_simulation_params { + gene_chooser_params { + gene_chooser_method uniform + } + + nucleotides_remover_params { + nucleotides_remover_method uniform + uniform_remover_params { + max_remove_v_gene 20 + max_remove_d_gene_left 5 + max_remove_d_gene_right 5 + max_remove_j_gene 10 + } + } + + p_nucleotides_creator_params { + p_nucleotides_creator_method uniform + uniform_creator_params { + max_create_v_gene 5 + max_create_d_gene_left 3 + max_create_d_gene_right 3 + max_create_j_gene 3 + } + } + + n_nucleotides_inserter_params { + n_nucleotides_method uniform + uniform_inserter_params { + max_vj_insertion 10 + max_vd_insertion 21 + max_dj_insertion 23 + } + } + + cleavage_params { + prob_cleavage_v 0.5 + prob_cleavage_d_left 0.5 + prob_cleavage_d_right 0.5 + prob_cleavage_j 0.5 + } + } + + multiplicity_creator_params { + multiplicity_method geometric + geometric_params { + lambda 0.1 + } + } + + productive_params { + productive_part 1 + } + + number_of_metaroots 10 + } + + clonal_tree_simulator_params { + tree_size_generator_params { + tree_size_generator_method geometric + geometric_params { + lambda 0.001 + } + } + + shm_creator_params { + shm_creator_method poisson + poisson_params { + lambda 2 + } + } + + pool_manager_strategy wide; uniform, wide, deep + prob_ret_to_pool 0.9 + lambda_distr_n_children 0.3 + + } +} diff --git a/data/germline/human/IG_antevolo/IGHD.fa b/data/germline/human/IG_antevolo/IGHD.fa new file mode 100644 index 00000000..42135f89 --- /dev/null +++ b/data/germline/human/IG_antevolo/IGHD.fa @@ -0,0 +1,54 @@ +>IGHD1-1*01 +ggtacaactggaacgac +>IGHD1-14*01 +ggtataaccggaaccac +>IGHD1-20*01 +ggtataactggaacgac +>IGHD1-26*01 +ggtatagtgggagctactac +>IGHD1-7*01 +ggtataactggaactac +>IGHD2-15*01 +aggatattgtagtggtggtagctgctactcc +>IGHD2-2*01 +aggatattgtagtagtaccagctgctatgcc +>IGHD2-21*01 +agcatattgtggtggtgattgctattcc +>IGHD2-8*01 +aggatattgtactaatggtgtatgctatacc +>IGHD3-10*01 +gtattactatggttcggggagttattataac +>IGHD3-16*01 +gtattatgattacgtttgggggagttatgcttatacc +>IGHD3-22*01 +gtattactatgatagtagtggttattactac +>IGHD3-3*01 +gtattacgatttttggagtggttattatacc +>IGHD3-9*01 +gtattacgatattttgactggttattataac +>IGHD4-11*01 +tgactacagtaactac +>IGHD4-17*01 +tgactacggtgactac +>IGHD4-23*01 +tgactacggtggtaactcc +>IGHD4-4*01 +tgactacagtaactac +>IGHD5-12*01 +gtggatatagtggctacgattac +>IGHD5-18*01 +gtggatacagctatggttac +>IGHD5-24*01 +gtagagatggctacaattac +>IGHD5-5*01 +gtggatacagctatggttac +>IGHD6-13*01 +gggtatagcagcagctggtac +>IGHD6-19*01 +gggtatagcagtggctggtac +>IGHD6-25*01 +gggtatagcagcggctac +>IGHD6-6*01 +gagtatagcagctcgtcc +>IGHD7-27*01 +ctaactgggga diff --git a/data/germline/human/IG_antevolo/IGHJ.fa b/data/germline/human/IG_antevolo/IGHJ.fa new file mode 100644 index 00000000..26c1ce32 --- /dev/null +++ b/data/germline/human/IG_antevolo/IGHJ.fa @@ -0,0 +1,12 @@ +>IGHJ1*01 +gctgaatacttccagcactggggccagggcaccctggtcaccgtctcctcag +>IGHJ2*01 +ctactggtacttcgatctctggggccgtggcaccctggtcactgtctcctcag +>IGHJ3*01 +tgatgcttttgatgtctggggccaagggacaatggtcaccgtctcttcag +>IGHJ4*01 +actactttgactactggggccaaggaaccctggtcaccgtctcctcag +>IGHJ5*01 +acaactggttcgactcctggggccaaggaaccctggtcaccgtctcctcag +>IGHJ6*01 +attactactactactacggtatggacgtctggggcaaagggaccacggtcaccgtctcctcag diff --git a/data/germline/human/IG_antevolo/IGHV.fa b/data/germline/human/IG_antevolo/IGHV.fa new file mode 100644 index 00000000..29732486 --- /dev/null +++ b/data/germline/human/IG_antevolo/IGHV.fa @@ -0,0 +1,170 @@ +>IGHV1-18*01 +caggttcagctggtgcagtctggagctgaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacacctttaccagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttacaatggtaacacaaactatgcacagaagctccagggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga +>IGHV1-2*01 +caggtgcagctggtgcagtctggggctgaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttcaccggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaacagtggtggcacaaactatgcacagaagtttcagggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga +>IGHV1-24*01 +caggtccagctggtacagtctggggctgaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggtttccggatacaccctcactgaattatccatgcactgggtgcgacaggctcctggaaaagggcttgagtggatgggaggttttgatcctgaagatggtgaaacaatctacgcacagaagttccagggcagagtcaccatgaccgaggacacatctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga +>IGHV1-3*01 +caggtccagcttgtgcagtctggggctgaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttcactagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggatcaacgctggcaatggtaacacaaaatattcacagaagttccagggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtgcgagaga +>IGHV1-38-4*01 +caggtccagctggtgcagtcttgggctgaggtgaggaagtctggggcctcagtgaaagtctcctgtagtttttctgggtttaccatcaccagctacggtatacattgggtgcaacagtcccctggacaagggcttgagtggatgggatggatcaaccctggcaatggtagcccaagctatgccaagaagtttcagggcagattcaccatgaccagggacatgtccacaaccacagcctacacagacctgagcagcctgacatctgaggacatggctgtgtattactatgcaagaca +>IGHV1-45*01 +cagatgcagctggtgcagtctggggctgaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttcacctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttcaatggtaacaccaactacgcacagaaattccaggacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagata +>IGHV1-46*01 +caggtgcagctggtgcagtctggggctgaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttcaccagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagtggtggtagcacaagctacgcacagaagttccagggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-58*01 +caaatgcagctggtgcagtctgggcctgaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcacctttactagctctgctgtgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggcagtggtaacacaaactacgcacagaagttccaggaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga +>IGHV1-68*01 +caggtgcagctggggcagtctgaggctgaggtaaagaagcctggggcctcagtgaaggtctcctgcaaggcttccggatacaccttcacttgctgctccttgcactggttgcaacaggcccctggacaagggcttgaaaggatgagatggatcacactttacaatggtaacaccaactatgcaaagaagttccagggcagagtcaccattaccagggacatgtccctgaggacagcctacatagagctgagcagcctgagatctgaggactcggctgtgtattactgggcaagata +>IGHV1-69*01 +caggtccagctggtgcagtctggggctgaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttcagcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatctttggtacagcaaactacgcacagaagttccagggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69-2*01 +gaggtccagctggtacagtctggggctgaggtgaagaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttcaccgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaagatggtgaaacaatatacgcagagaagttccagggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga +>IGHV1-69D*01 +caggtgcagctggtgcagtctggggctgaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttcagcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatctttggtacagcaaactacgcacagaagttccagggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-8*01 +caggtgcagctggtgcagtctggggctgaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttcaccagttatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaacagtggtaacacaggctatgcacagaagttccagggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg +>IGHV1-NL1*01 +caggttcagctgttgcagcctggggtccaggtgaagaagcctgggtcctcagtgaaggtctcctgctaggcttccagatacaccttcaccaaatactttacacggtgggtgtgacaaagccctggacaagggcatnagtggatgggatgaatcaacccttacaacgataacacacactacgcacagacgttctggggcagagtcaccattaccagtgacaggtccatgagcacagcctacatggagctgagcngcctgagatccgaagacatggtcgtgtattactgtgtgagaga +>IGHV2-10*01 +caggtcaccttgaaggagtctggtcctgcactggtgaaacccacacagaccctcatgctgacctgcaccttctctgggttctcactcagcacttctggaatgggtgtgggttagatctgtcagccctcagcaaaggccctggagtggcttgcacacatttattagaatgataataaatactacagcccatctctgaagagtaggctcattatctccaaggacacctccaagaatgaagtggttctaacagtgatcaacatggacattgtggacacagccacacattactgtgcaaggagac +>IGHV2-26*01 +caggtcaccttgaaggagtctggtcctgtgctggtgaaacccacagagaccctcacgctgacctgcaccgtctctgggttctcactcagcaatgctagaatgggtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacacattttttcgaatgacgaaaaatcctacagcacatctctgaagagcaggctcaccatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacatattactgtgcacggatac +>IGHV2-5*01 +cagatcaccttgaaggagtctggtcctacgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagcactagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggatgatgataagcgctacagcccatctctgaagagcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac +>IGHV2-70*01 +caggtcaccttgaaggagtctggtcctgcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagcactagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggatgatgataaatactacagcacatctctgaagaccaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac +>IGHV2-70D*01 +caggtcaccttgaaggagtctggtcctgcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagcactagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggatgatgataaattctacagcacatctctgaagaccaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac +>IGHV3-11*01 +caggtgcagctggtggagtctgggggaggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttcagtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagtagtagttacacaaactacgcagactctgtgaagggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga +>IGHV3-13*01 +gaggtgcagctggtggagtctgggggaggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttcagtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgctggtgacacatactatccaggctccgtgaagggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga +>IGHV3-15*01 +gaggtgcagctggtggagtctgggggaggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttcagtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaaggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga +>IGHV3-16*01 +gaggtacaactggtggagtctgggggaggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttcagtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaatggcagtaggacgcactatgtggactccgtgaagcgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa +>IGHV3-19*01 +acagtgcagctggtggagtctgggggaggcttggtagagcctggggggtccctgagactctcctgtgcagcctctggattcaccttcagtaacagtgacatgaactgggtccgccaggctccaggaaaggggctggagtgggtatcgggtgttagttggaatggcagtaggacgcactatgcagactctgtgaagggccgattcatcatctccagagacaattccaggaacttcctgtatcagcaaatgaacagcctgaggcccgaggacatggctgtgtattactgtgtgagaaa +>IGHV3-20*01 +gaggtgcagctggtggagtctgggggaggtgtggtacggcctggggggtccctgagactctcctgtgcagcctctggattcacctttgatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaatggtggtagcacaggttatgcagactctgtgaagggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga +>IGHV3-21*01 +gaggtgcagctggtggagtctgggggaggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttcagtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagtagtagttacatatactacgcagactcagtgaagggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-22*01 +gaggtgcatctggtggagtctgggggagccttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttcagttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaaggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga +>IGHV3-23*01 +gaggtgcagctgttggagtctgggggaggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcacctttagcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagtggtggtagcacatactacgcagactccgtgaagggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga +>IGHV3-23D*01 +gaggtgcagctgttggagtctgggggaggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcacctttagcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagtggtggtagcacatactacgcagactccgtgaagggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga +>IGHV3-25*01 +gagatgcagctggtggagtctgggggaggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttcagtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaatgggggtagcacatacctcatagactccggtaaggaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga +>IGHV3-29*01 +gaggtggagctgatagagcccacagaggacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttcagtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgatggaagtcagatacaccatgcagactctgtgaagggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt +>IGHV3-30*01 +caggtgcagctggtggagtctgggggaggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttcagtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgatggaagtaataaatactacgcagactccgtgaagggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30-2*01 +gaggtacagctcgtggagtccggagaggacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttcagtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgatggaagtcagatatgttatgcataatctttgaagagcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgaggca +>IGHV3-30-22*01 +gaggtggagctgatagagtccatagaggacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttcagtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgatggaagtcagatacaccatgcagactctgtgaagggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagagctgaggacatggacgtgtatggctgtacataaggtc +>IGHV3-30-3*01 +caggtgcagctggtggagtctgggggaggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttcagtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgatggaagcaataaatactacgcagactccgtgaagggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30-33*01 +gaggtacagctcgtggagtccggagaggacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttcagtagctactgaaggagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgatggaagtcagatatgttatgcataatctttgaagagcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgagg +>IGHV3-30-42*01 +gaggtggagctgatagagcccacagaggacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttcagtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgatggaagtcagatacaccatgcagactctgtgaagggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt +>IGHV3-30-5*01 +caggtgcagctggtggagtctgggggaggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttcagtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgatggaagtaataaatactatgcagactccgtgaagggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga +>IGHV3-30-52*01 +gaggtacagctcgtggagtccggagaggacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttcagtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgatggaagtcagatatgttatgcataatctttgaagagcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgagg +>IGHV3-32*01 +gaggtggagctgatagagtccatagaggacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttcagtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgatggaagtcagatacaccatgcagactctgtgaagggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacactcagagagctgaggacgtggccgtgtatggctatacataaggtc +>IGHV3-33*01 +caggtgcagctggtggagtctgggggaggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttcagtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgatggaagtaataaatactatgcagactccgtgaagggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-33-2*01 +gaggtacagctcgtggagtccggagaggacccaagacaacctgggggatccttgagactctcctgtgcagactctggattaaccttcagtagctactgaatgagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgatggaagtcagatatgttatgcccaatctgtgaagagcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgcaaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgaggca +>IGHV3-35*01 +gaggtgcagctggtggagtctgggggaggcttggtacagcctgggggatccctgagactctcctgtgcagcctctggattcaccttcagtaacagtgacatgaactgggtccatcaggctccaggaaaggggctggagtgggtatcgggtgttagttggaatggcagtaggacgcactatgcagactctgtgaagggccgattcatcatctccagagacaattccaggaacaccctgtatctgcaaacgaatagcctgagggccgaggacacggctgtgtattactgtgtgagaaa +>IGHV3-38*01 +gaggtgcagctggtggagtctgggggaggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtcagtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggtggtagcacatactacgcagactccaggaagggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata +>IGHV3-38-3*01 +gaggtgcagctggtggagtctcggggagtcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtcagtagcaatgagatgagctgggtccgccaggctccagggaagggtctggagtgggtctcatccattagtggtggtagcacatactacgcagactccaggaagggcagattcaccatctccagagacaattccaagaacacgctgcatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtaagaaaga +>IGHV3-43*01 +gaagtgcagctggtggagtctgggggagtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcacctttgatgattataccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggatggtggtagcacatactatgcagactctgtgaagggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata +>IGHV3-43D*01 +gaagtgcagctggtggagtctgggggagtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcacctttgatgattatgccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggatggtggtagcacctactatgcagactctgtgaagggtcgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagagctgaggacaccgccttgtattactgtgcaaaagata +>IGHV3-47*01 +gaggatcagctggtggagtctgggggaggcttggtacagcctggggggtccctgcgaccctcctgtgcagcctctggattcgccttcagtagctatgctctgcactgggttcgccgggctccagggaagggtctggagtgggtatcagctattggtactggtggtgatacatactatgcagactccgtgatgggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcatatgaacagcctgatagctgaggacatggctgtgtattattgtgcaagaga +>IGHV3-48*01 +gaggtgcagctggtggagtctgggggaggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttcagtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagtagtagtaccatatactacgcagactctgtgaagggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-49*01 +gaggtgcagctggtggagtctgggggaggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcacctttggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaaggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga +>IGHV3-52*01 +gaggtgcagctggtggagtctgggtgaggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttcagtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgacggaagtgagaaatactatgtagactctgtgaagggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgagagg +>IGHV3-53*01 +gaggtgcagctggtggagtctggaggaggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtcagtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggtggtagcacatactacgcagactccgtgaagggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga +>IGHV3-54*01 +gaggtacagctggtggagtctgaagaaaaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttcagtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtaggatagaagtcagctatgttatgcacaatctgtgaagagcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt +>IGHV3-62*01 +gaggtgcagctggtggagtctggggaaggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttcagtagctctgctatgcactgggtccgccaggctccaagaaagggtttgtagtgggtctcagttattagtacaagtggtgataccgtactctacacagactctgtgaagggccgattcaccatctccagagacaatgcccagaattcactgtctctgcaaatgaacagcctgagagccgagggcacagttgtgtactactgtgtgaaaga +>IGHV3-63*01 +gaggtggagctgatagagtccatagagggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttcagtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgatggaagtcagatataccatgcagactctgtgaagggcagattcaccatctccaaagacaatgctaagaactcaccgtatctccaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataaggtt +>IGHV3-64*01 +gaggtgcagctggtggagtctgggggaggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttcagtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaatgggggtagcacatactacgcagactcagtgaagggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgagcagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-64D*01 +gaggtgcagctggtggagtctgggggaggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttcagtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaatgggggtagcacatactacgcagactccgtgaagggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga +>IGHV3-66*01 +gaggtgcagctggtggagtctgggggaggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtcagtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggtggtagcacatactacgcagactccgtgaagggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-69-1*01 +gaggtgcagctggtggagtctgggggaggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttcagtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagtagtaccatatactacgcagactctgtgaagggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-7*01 +gaggtgcagctggtggagtctgggggaggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcacctttagtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagatggaagtgagaaatactatgtggactctgtgaagggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-71*01 +gaggtgcagctggtggagtctgggggaggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttcagtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaaggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-72*01 +gaggtgcagctggtggagtctgggggaggcttggtccagcctggagggtccctgagactctcctgtgcagcctctggattcaccttcagtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagttacaccacagaatacgccgcgtctgtgaaaggcagattcaccatctcaagagatgattcaaagaactcactgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtgctagaga +>IGHV3-73*01 +gaggtgcagctggtggagtctgggggaggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttcagtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaaggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca +>IGHV3-74*01 +gaggtgcagctggtggagtccgggggaggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttcagtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgatgggagtagcacaagctacgcggactccgtgaagggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga +>IGHV3-9*01 +gaagtgcagctggtggagtctgggggaggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcacctttgatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaatagtggtagcataggctatgcggactctgtgaagggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata +>IGHV3-NL1*01 +caggtgcagctggtggagtctgggggaggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttcagtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtctcagttatttatagcggtggtagtagcacatactatgcagactccgtgaagggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga +>IGHV4-28*01 +caggtgcagctgcaggagtcgggcccaggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagcagtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagtgggagcacctactacaacccgtccctcaagagtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa +>IGHV4-30-2*01 +cagctgcagctgcaggagtccggctcaggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagcagtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagtgggagcacctactacaacccgtccctcaagagtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga +>IGHV4-30-4*01 +caggtgcagctgcaggagtcgggcccaggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagcagtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagtgggagcacctactacaacccgtccctcaagagtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga +>IGHV4-31*01 +caggtgcagctgcaggagtcgggcccaggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagcagtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagtgggagcacctactacaacccgtccctcaagagtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-34*01 +caggtgcagctacagcagtggggcgcaggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttcagtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagtggaagcaccaactacaacccgtccctcaagagtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg +>IGHV4-38-2*01 +caggtgcagctgcaggagtcgggcccaggactggtgaagccttcggagaccctgtccctcacctgcgctgtctctggttactccatcagcagtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagtgggagcacctactacaacccgtccctcaagagtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga +>IGHV4-39*01 +cagctgcagctgcaggagtcgggcccaggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagcagtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagtgggagcacctactacaacccgtccctcaagagtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaga +>IGHV4-4*01 +caggtgcagctgcaggagtcgggcccaggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagcagtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagtgggagcaccaactacaacccgtccctcaagagtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-55*01 +caggtgcagctgcaggagtcgggcccaggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagcagtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagtgggagcacctactacaacccgtccctcaagagtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata +>IGHV4-59*01 +caggtgcagctgcaggagtcgggcccaggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagtgggagcaccaactacaacccctccctcaagagtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-61*01 +caggtgcagctgcaggagtcgggcccaggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagcagtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagtgggagcaccaactacaacccctccctcaagagtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga +>IGHV5-10-1*01 +gaagtgcagctggtgcagtctggagcagaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagctttaccagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagtgactcttataccaactacagcccgtccttccaaggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca +>IGHV5-51*01 +gaggtgcagctggtgcagtctggagcagaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagctttaccagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggtgactctgataccagatacagcccgtccttccaaggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca +>IGHV5-78*01 +gaggtgcagctgttgcagtctgcagcagaggtgaaaagacccggggagtctctgaggatctcctgtaagacttctggatacagctttaccagctactggatccactgggtgcgccagatgcccgggaaagaactggagtggatggggagcatctatcctgggaactctgataccagatacagcccatccttccaaggccacgtcaccatctcagccgacagctccagcagcaccgcctacctgcagtggagcagcctgaaggcctcggacgccgccatgtattattgtgtgaga +>IGHV6-1*01 +caggtacagctgcagcagtcaggtccaggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctctagcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtccaagtggtataatgattatgcagtatctgtgaaaagtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga +>IGHV7-34-1*01 +ctgcagctggtgcagtctgggcctgaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttcaccatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctacactgggaacccaacgtatacccacggcttcacaggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta +>IGHV7-4-1*01 +caggtgcagctggtgcaatctgggtctgagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttcactagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaacactgggaacccaacgtatgcccagggcttcacaggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga +>IGHV7-40*01 +ttttcaatagaaaagtcaaataatctaagtgtcaatcagtggatgattagataaaatatgatatatgtaaatcatggaatactatgcagccagtatggtatgaattcagtgtgaccagcccctggacaagggcttgagtggatgggatggatcatcacctacactgggaacccaacatataccaacggcttcacaggacggtttctattctccatggacacctctgtcagcatggcgtatctgcagatcagcagcctaaaggctgaggacacggccgtgtatgactgtatgagaga +>IGHV7-81*01 +caggtgcagctggtgcagtctggccatgaggtgaagcagcctggggcctcagtgaaggtctcctgcaaggcttctggttacagtttcaccacctatggtatgaattgggtgccacaggcccctggacaagggcttgagtggatgggatggttcaacacctacactgggaacccaacatatgcccagggcttcacaggacggtttgtcttctccatggacacctctgccagcacagcatacctgcagatcagcagcctaaaggctgaggacatggccatgtattactgtgcgagata diff --git a/data/germline/human/IG_antevolo/IGKJ.fa b/data/germline/human/IG_antevolo/IGKJ.fa new file mode 100644 index 00000000..42309e92 --- /dev/null +++ b/data/germline/human/IG_antevolo/IGKJ.fa @@ -0,0 +1,10 @@ +>IGKJ1*01 +gtggacgttcggccaagggaccaaggtggaaatcaaac +>IGKJ2*01 +tgtgcacttttggccaggggaccaagctggagatcaaac +>IGKJ3*01 +attcactttcggccctgggaccaaagtggatatcaaac +>IGKJ4*01 +gctcactttcggcggagggaccaaggtggagatcaaac +>IGKJ5*01 +gatcaccttcggccaagggacacgactggagattaaac diff --git a/data/germline/human/IG_antevolo/IGKV.fa b/data/germline/human/IG_antevolo/IGKV.fa new file mode 100644 index 00000000..88b1622a --- /dev/null +++ b/data/germline/human/IG_antevolo/IGKV.fa @@ -0,0 +1,144 @@ +>IGKV1-12*01 +gacatccagatgacccagtctccatcttccgtgtctgcatctgtaggagacagagtcaccatcacttgtcgggcgagtcagggtattagcagctggttagcctggtatcagcagaaaccagggaaagcccctaagctcctgatctatgctgcatccagtttgcaaagtggggtcccatcaaggttcagcggcagtggatctgggacagatttcactctcaccatcagcagcctgcagcctgaagattttgcaacttactattgtcaacaggctaacagtttccctcc +>IGKV1-13*01 +gccatccagttgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcaagtcagggcattagcagtgctttagcctgatatcagcagaaaccagggaaagctcctaagctcctgatctatgatgcctccagtttggaaagtggggtcccatcaaggttcagcggcagtggatctgggacagatttcactctcaccatcagcagcctgcagcctgaagattttgcaacttattactgtcaacagtttaataattaccctca +>IGKV1-16*01 +gacatccagatgacccagtctccatcctcactgtctgcatctgtaggagacagagtcaccatcacttgtcgggcgagtcagggcattagcaattatttagcctggtttcagcagaaaccagggaaagcccctaagtccctgatctatgctgcatccagtttgcaaagtggggtcccatcaaggttcagcggcagtggatctgggacagatttcactctcaccatcagcagcctgcagcctgaagattttgcaacttattactgccaacagtataatagttaccctcc +>IGKV1-17*01 +gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcaagtcagggcattagaaatgatttaggctggtatcagcagaaaccagggaaagcccctaagcgcctgatctatgctgcatccagtttgcaaagtggggtcccatcaaggttcagcggcagtggatctgggacagaattcactctcacaatcagcagcctgcagcctgaagattttgcaacttattactgtctacagcataatagttaccctcc +>IGKV1-27*01 +gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcgagtcagggcattagcaattatttagcctggtatcagcagaaaccagggaaagttcctaagctcctgatctatgctgcatccactttgcaatcaggggtcccatctcggttcagtggcagtggatctgggacagatttcactctcaccatcagcagcctgcagcctgaagatgttgcaacttattactgtcaaaagtataacagtgcccctcc +>IGKV1-33*01 +gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccaggcgagtcaggacattagcaactatttaaattggtatcagcagaaaccagggaaagcccctaagctcctgatctacgatgcatccaatttggaaacaggggtcccatcaaggttcagtggaagtggatctgggacagattttactttcaccatcagcagcctgcagcctgaagatattgcaacatattactgtcaacagtatgataatctccctcc +>IGKV1-37*01 +gacatccagttgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggtgagtcagggcattagcagttatttaaattggtatcggcagaaaccagggaaagttcctaagctcctgatctatagtgcatccaatttgcaatctggagtcccatctcggttcagtggcagtggatctgggacagatttcactctcactatcagcagcctgcagcctgaagatgttgcaacttattacggtcaacggacttacaatgcccctcc +>IGKV1-39*01 +gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcaagtcagagcattagcagctatttaaattggtatcagcagaaaccagggaaagcccctaagctcctgatctatgctgcatccagtttgcaaagtggggtcccatcaaggttcagtggcagtggatctgggacagatttcactctcaccatcagcagtctgcaacctgaagattttgcaacttactactgtcaacagagttacagtacccctcc +>IGKV1-5*01 +gacatccagatgacccagtctccttccaccctgtctgcatctgtaggagacagagtcaccatcacttgccgggccagtcagagtattagtagctggttggcctggtatcagcagaaaccagggaaagcccctaagctcctgatctatgatgcctccagtttggaaagtggggtcccatcaaggttcagcggcagtggatctgggacagaattcactctcaccatcagcagcctgcagcctgatgattttgcaacttattactgccaacagtataatagttattctcc +>IGKV1-6*01 +gccatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcaagtcagggcattagaaatgatttaggctggtatcagcagaaaccagggaaagcccctaagctcctgatctatgctgcatccagtttacaaagtggggtcccatcaaggttcagcggcagtggatctggcacagatttcactctcaccatcagcagcctgcagcctgaagattttgcaacttattactgtctacaagattacaattaccctcc +>IGKV1-8*01 +gccatccggatgacccagtctccatcctcattctctgcatctacaggagacagagtcaccatcacttgtcgggcgagtcagggtattagcagttatttagcctggtatcagcaaaaaccagggaaagcccctaagctcctgatctatgctgcatccactttgcaaagtggggtcccatcaaggttcagcggcagtggatctgggacagatttcactctcaccatcagctgcctgcagtctgaagattttgcaacttattactgtcaacagtattatagttaccctcc +>IGKV1-9*01 +gacatccagttgacccagtctccatccttcctgtctgcatctgtaggagacagagtcaccatcacttgccgggccagtcagggcattagcagttatttagcctggtatcagcaaaaaccagggaaagcccctaagctcctgatctatgctgcatccactttgcaaagtggggtcccatcaaggttcagcggcagtggatctgggacagaattcactctcacaatcagcagcctgcagcctgaagattttgcaacttattactgtcaacagcttaatagttaccctcc +>IGKV1-NL1*01 +gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcgagtcagggcattagcaattctttagcctggtatcagcagaaaccagggaaagcccctaagctcctgctctatgctgcatccagattggaaagtggggtcccatccaggttcagtggcagtggatctgggacggattacactctcaccatcagcagcctgcagcctgaagattttgcaacttattactgtcaacagtattatagtacccctcc +>IGKV1/OR-2*01 +gacatccagatgacccagtctccatcctccctgtctgcatctgtaggaggcagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccaaggaaaactcctaagctcctgatctatgctgcatccagtctgcaaagtgggattccctctcggttcagtgacagtggatctgggacagattacactctcaccatcagcagcctgcagcctgaagattttgcaacttattactgtcaacagagtgacagtaaccctcc +>IGKV1/OR-3*01 +gacatccagatgacccagtctccatcctccctgtctgcatctgtaggaggcagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccagggaaaactcctaagctcctgatctatgctgcatccagtctgcaaagtgggattccctctcggttcagtgacagtggatctgggacagattacactctcaccatcagcagcctgcagcctgaagattttgcagcttattactgtcaacagagtgacagtacccctcc +>IGKV1/OR-4*01 +gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccagggaaaactcctaagttcctgatctatgcagcatccagtctgcaaagtgggattccctctcggttcagtgacagtggatctgggacagattacactctcaccatcagcagcctgcagcctgaagattttgcaacttattactgtcaacagagtgacagtacccctcc +>IGKV1/OR1-1*01 +gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccagggaaaactcctaagctcctgatctatgctgcatccagtctgcaaagtgggattccctctcggttcagtgacagtggatctggggcagactacactctcaccatccgcagcctgcagcctgaagattttgcaacttattactgtcaacagagtgacagtacccctcc +>IGKV1/OR10-1*01 +gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccagggaaaactcctaagctcctgatctatgctgcatccagtctgcaaagtgggattccctctcggttcagtgacagtggatctgggacagattacactctcaccatcagcagcctgcagcctgaagattttgcaacttattactgtcaacagagtgacagtacctctcc +>IGKV1/OR15-118*01 +gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccagggaaaactcctaagctcctgatctatgctgcacccagtctgcaaagtgggattccctctcggttcagtgacagtggatctggggcagattacactctcaccatccgcagcctgcagcctgaagattttgcaacttattagtgtcaacagagtgacagtacccctcc +>IGKV1/OR2-0*01 +gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccagggaaaactcctaagctcctgatctatgctgcacccagtctgcaaagtgggattccctctcggttcagtgacagtggatctggggcagattacactctcaccatccgcagcctgcagcctgaagattttgcaacttattactgtcaacagagtgacagtacccctcc +>IGKV1/OR2-1*01 +gacatccagatgacccagtctccatcctccctgtctgcatctgtaggaggcagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccagggaaaactcctaagctcctgatctatgctgcatccagtctgcaaagtgggattccctctcggttcagtgacagtggatctggggcagattacactctcaccatcagcagcctgcagcctgaagattttgcagcttattactgtcaacagagtgacagtacccctcc +>IGKV1/OR2-108*01 +gacatccaggtgacccagtctccatcttccctgtctgcgtctgtaggagacagagtcaccatcacctgccgggcaagtcagggcattagcaatgggttatcctggtatcagcagaaaccagggcaagcccctacgctcctgatctatgctgcatccagtttgcagtcgggggtcccatctcggttcagtggcagtggatctgggacagatttcactctcaccatcagcagcctgcagcctgaagatgttgcaacttattactgtctacaggattatactaccccatt +>IGKV1/OR2-11*01 +gacatccagatgactcagcctccatcctccctgtctgcatctgtaggagacagagccaccgtctcttgccaggctagtcaaagcatttacaactatttaaattggtatcagcagaaaccagggaaagcacctaagttcctgacctatagggcatccagtttgcagagggcgatgccatctcagttcagtggcagcggatatggaagagatttcactctcaccgtcagcagcctgcagcctgaagattttgcaacttattaatgtcaacaagagagcattttccctcc +>IGKV1/OR2-118*01 +gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccagggaaaactcctaagctcctgatctatgctgcatccagtctgcaaagtgggattccctctcggttcagtgacagtggatctggggcagattacactctcaccatccgcagcctgcagcctgaagattttgcaaattattactgtcaacagagtgacagtacccctcc +>IGKV1/OR2-2*01 +gacatccagatgacccagtctccatcctccctgtctgcatctgtaggaggcagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccagggaaaactcctaagctcctgatctatgctgcatccagtctgcaaagtgggattccctctcggttcagtgacagtggatctggggcagattacactctcaccatcagcagcctgcagcctgaagattttgcagcttattactgtcaacagagtgacagtacccctcc +>IGKV1/OR2-3*01 +gacatccagatgacccagcctccatcctccctgtctgcatctgtaggagacagagtcaccgtctcttgccaggctagtcaaagcatttacaactatttaaattggtatcagcagaaaccagggaaagcacctaagttcctgacctatagggcatccagtttgcagagggggatgccatctcagttcagtggcagcggatatggaagagatttcactctcactgtcagcagcctgcagcctgaagattttgcaacttattaatgtcaacaagagagcattttccctct +>IGKV1/OR2-9*01 +gacatccagatgactcagcctccatcctccctgtctgcatctgtaggagacagagccaccgtctcttgccaggctagtcaaagcatttacaactatttaaattggtatcagcagaaaccagggaaagcacctaagttcctgacctatagggcatccagtttgcagagggcgatgccatctcagttcagtggcagcggatatggaagagatttcactctcaccgtcagcagcctgcagcctgaagattttgcaacttattaatgtcaacaagagagcattttccctcc +>IGKV1/OR22-5*01 +gacatccagatgacccagtctccatcctccctgtctgcatctgtaggaggcagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccagggaaaactcctaagcccctgatctatgctgcatccagtctgcaaagtgggattccctctcagttcagtgacagtggatctgggacagattagactctcaccatcagcagcctgcagcctgaagattttgcaacttattactgtcaacagagttacagtacccctcc +>IGKV1/OR9-1*01 +gacatccagatgacccagtctccatcctccctgtctgcatctgtaggaggcagagtcaccatcacttgccgggtgagtcagggcattagcaataatttaaattggtatcagcagaaaccaaggaaaactcctaagctcctgatctatgctgcatccagtctgcaaagtgggattccctctcggttcagtgacagtggatctgggacagattacactctcaccatcagcagcctgcagcctgaagattttgcaacctattactgtcaacagagtgacagtaaccctcc +>IGKV1/OR9-2*01 +gacatccagatgacccagtctccatcctccctgtctgcatctgtaggaggcagagtcaccatcacttgccgggcgagtcagggcattagcaataatttaaattggtatcagcagaaaccaaggaaaactcctaagctcctgatctatgctgcatccagtctgcaaagtgggattccctctcggttcagtgacagtggatctgggacagattacactctcaccatcagcagcctgcagcctgaagattttgcaacctattactgtcaacagagtgacagtaaccctcc +>IGKV1/ORY-1*01 +gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcgagtcagggcattatcaataatttaaattggtatcagaagaaaccagggaaaactcctaagctcctgatctatgctgcatccagtctgcaaagtgggattcccactcggttcagtgacagtggatctgggacagattacactcccaccatcagcagcctgcagcctgaagattttgcaacttactactgtcaacagagtgacagtacccctcc +>IGKV1D-12*01 +gacatccagatgacccagtctccatcttctgtgtctgcatctgtaggagacagagtcaccatcacttgtcgggcgagtcagggtattagcagctggttagcctggtatcagcagaaaccagggaaagcccctaagctcctgatctatgctgcatccagtttgcaaagtggggtcccatcaaggttcagcggcagtggatctgggacagatttcactctcactatcagcagcctgcagcctgaagattttgcaacttactattgtcaacaggctaacagtttccctcc +>IGKV1D-13*01 +gccatccagttgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcaagtcagggcattagcagtgctttagcctggtatcagcagaaaccagggaaagctcctaagctcctgatctatgatgcctccagtttggaaagtggggtcccatcaaggttcagcggcagtggatctgggacagatttcactctcaccatcagcagcctgcagcctgaagattttgcaacttattactgtcaacagtttaataattaccctca +>IGKV1D-16*01 +gacatccagatgacccagtctccatcctcactgtctgcatctgtaggagacagagtcaccatcacttgtcgggcgagtcagggtattagcagctggttagcctggtatcagcagaaaccagagaaagcccctaagtccctgatctatgctgcatccagtttgcaaagtggggtcccatcaaggttcagcggcagtggatctgggacagatttcactctcaccatcagcagcctgcagcctgaagattttgcaacttattactgccaacagtataatagttaccctcc +>IGKV1D-17*01 +aacatccagatgacccagtctccatctgccatgtctgcatctgtaggagacagagtcaccatcacttgtcgggcgaggcagggcattagcaattatttagcctggtttcagcagaaaccagggaaagtccctaagcacctgatctatgctgcatccagtttgcaaagtggggtcccatcaaggttcagcggcagtggatctgggacagaattcactctcacaatcagcagcctgcagcctgaagattttgcaacttattactgtctacagcataatagttaccctcc +>IGKV1D-33*01 +gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccaggcgagtcaggacattagcaactatttaaattggtatcagcagaaaccagggaaagcccctaagctcctgatctacgatgcatccaatttggaaacaggggtcccatcaaggttcagtggaagtggatctgggacagattttactttcaccatcagcagcctgcagcctgaagatattgcaacatattactgtcaacagtatgataatctccctcc +>IGKV1D-37*01 +gacatccagttgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggtgagtcagggcattagcagttatttaaattggtatcggcagaaaccagggaaagttcctaagctcctgatctatagtgcatccaatttgcaatctggagtcccatctcggttcagtggcagtggatctgggacagatttcactctcactatcagcagcctgcagcctgaagatgttgcaacttattacggtcaacggacttacaatgcccctcc +>IGKV1D-39*01 +gacatccagatgacccagtctccatcctccctgtctgcatctgtaggagacagagtcaccatcacttgccgggcaagtcagagcattagcagctatttaaattggtatcagcagaaaccagggaaagcccctaagctcctgatctatgctgcatccagtttgcaaagtggggtcccatcaaggttcagtggcagtggatctgggacagatttcactctcaccatcagcagtctgcaacctgaagattttgcaacttactactgtcaacagagttacagtacccctcc +>IGKV1D-42*01 +gacatccagatgatccagtctccatctttcctgtctgcatctgtaggagacagagtcagtatcatttgctgggcaagtgagggcattagcagtaatttagcctggtatctgcagaaaccagggaaatcccctaagctcttcctctatgatgcaaaagatttgcaccctggggtctcatcgaggttcagtggcaggggatctgggacggatttcactctcaccatcatcagcctgaagcctgaagattttgcagcttattactgtaaacaggacttcagttaccctcc +>IGKV1D-43*01 +gccatccggatgacccagtctccattctccctgtctgcatctgtaggagacagagtcaccatcacttgctgggccagtcagggcattagcagttatttagcctggtatcagcaaaaaccagcaaaagcccctaagctcttcatctattatgcatccagtttgcaaagtggggtcccatcaaggttcagcggcagtggatctgggacggattacactctcaccatcagcagcctgcagcctgaagattttgcaacttattactgtcaacagtattatagtacccctcc +>IGKV1D-8*01 +gtcatctggatgacccagtctccatccttactctctgcatctacaggagacagagtcaccatcagttgtcggatgagtcagggcattagcagttatttagcctggtatcagcaaaaaccagggaaagcccctgagctcctgatctatgctgcatccactttgcaaagtggggtcccatcaaggttcagtggcagtggatctgggacagatttcactctcaccatcagctgcctgcagtctgaagattttgcaacttattactgtcaacagtattatagtttccctcc +>IGKV2-18*01 +gatattgtgatgacccagactccaccctccctgcccgtcaaccctggagagccggcctccatctcttgcaggtctagtcagagcctcctgcatagtaatggatatacctatttgcattggtacctgcagaagccagggcagtctccacagctcctgatttatagggtttccaatcatctttctggggtcccagacaggtttagtggcagtgggtcaggtagtgatttcacactgaaaatcagctgggtggaggctgaggatgttggggtttattactgcatgcaagctacacagtttcctaa +>IGKV2-24*01 +gatattgtgatgacccagactccactctcctcacctgtcacccttggacagccggcctccatctcctgcaggtctagtcaaagcctcgtacacagtgatggaaacacctacttgagttggcttcagcagaggccaggccagcctccaagactcctaatttataagatttctaaccggttctctggggtcccagacagattcagtggcagtggggcagggacagatttcacactgaaaatcagcagggtggaagctgaggatgtcggggtttattactgcatgcaagctacacaatttcctca +>IGKV2-28*01 +gatattgtgatgactcagtctccactctccctgcccgtcacccctggagagccggcctccatctcctgcaggtctagtcagagcctcctgcatagtaatggatacaactatttggattggtacctgcagaagccagggcagtctccacagctcctgatctatttgggttctaatcgggcctccggggtccctgacaggttcagtggcagtggatcaggcacagattttacactgaaaatcagcagagtggaggctgaggatgttggggtttattactgcatgcaagctctacaaactcctcc +>IGKV2-29*01 +gatattgtgatgacccagactccactctctctgtccgtcacccctggacagccggcctccatctcctgcaagtctagtcagagcctcctgcatagtgatggaaagacctatttgtattggtacctgcagaagccaggccagtctccacagctcctgatctatgaagtttccagccggttctctggagtgccagataggttcagtggcagcgggtcagggacagatttcacactgaaaatcagccgggtggaggctgaggatgttggggtttattactgcatgcaaggtatacaccttcctcc +>IGKV2-30*01 +gatgttgtgatgactcagtctccactctccctgcccgtcacccttggacagccggcctccatctcctgcaggtctagtcaaagcctcgtatacagtgatggaaacacctacttgaattggtttcagcagaggccaggccaatctccaaggcgcctaatttataaggtttctaaccgggactctggggtcccagacagattcagcggcagtgggtcaggcactgatttcacactgaaaatcagcagggtggaggctgaggatgttggggtttattactgcatgcaaggtacacactggcctcc +>IGKV2-4*01 +gatattgtgatgacccagcatctgctctccctgcccatccctctgggagagccggcctccatctcctgcaggtctagtcagagcctcctgcatagtgatggaaacacctatttggattggtacctgcagaagccaggccagtctccacagcttcttatctacacaatttctaacaaattctatggagtcccaaacaagttcagtggcagcaggtcagggacaggtttcacacttaaattcagcaaagtggaggctgaggatgttggggtttattgctgtgaacagggtctgcaaggtcctca +>IGKV2-40*01 +gatattgtgatgacccagactccactctccctgcccgtcacccctggagagccggcctccatctcctgcaggtctagtcagagcctcttggatagtgatgatggaaacacctatttggactggtacctgcagaagccagggcagtctccacagctcctgatctatacgctttcctatcgggcctctggagtcccagacaggttcagtggcagtgggtcaggcactgatttcacactgaaaatcagcagggtggaggctgaggatgttggagtttattactgcatgcaacgtatagagtttccttc +>IGKV2/OR2-7D*01 +gacattctgttgacccagactccactctccctgtccatcacccccggagagccggcctccatctcctgcaggtctagtcgcagcctcctgcatagtaatggaaacacctatttacattggtagctgcagaagccaggccagcctccacagtgtctaatctgcaaggtttctaaccggttttctggggtcccagacaggttcagtggcagtgggtcgggcattgatttcacactgaaaatcagcccggtggaggctgcggatgttggggtttatattactgcatgcaagctacacactggtcccc +>IGKV2/OR22-4*01 +gacattgtgatgacccagactccactctccctgcctgtcactcctggagagccagcctccatctcctgcagatctagtgagagcctcttggatactgatgatgaatacacctatttgaattggtacctgcagaagccaggccagtctccacagctcctgatctatgaggtttccaaccgggcctctggagttccagacaggttcagtggcagtgggtcaggcactgatttcactctgaaaatcagtagggtggaggcttaggatgttggggtttattactgcatgcaagctctacaaactccgcc +>IGKV2D-18*01 +gatattgtgatgacccagactccaccctccctgcccgtcaaccctggagagccggcctccatctcctgcaggtctagtcaaagcctcctgcatagtaatggatatacctatttgcattggtacccgcagaagccagggcaatctccacagctcctgatttatagggtttccagtcgtttttctggggtcccagacaggtttagtggcagtgggtcaggcagtgatttcacactgaaaatcagctgggtggaggctgaggatgttggggtttattactgcatgcaagctacacagtttcct +>IGKV2D-24*01 +gatattgtgatgacccagactccactctcctcgcctgtcacccttggacagccggcctccatctccttcaggtctagtcaaagcctcgtacacagtgatggaaacacctacttgagttggcttcagcagaggccaggccagcctccaagactcctaatttataaggtttctaaccggttctctggggtcccagacagattcagtggcagtggggcagggacagatttcacactgaaaatcagcagggtggaagctgaggatgtcggggtttattactgcacgcaagctacacaatttcctca +>IGKV2D-26*01 +gagattgtgatgacccagactccactctccttgtctatcacccctggagagcaggcctccatgtcctgcaggtctagtcagagcctcctgcatagtgatggatacacctatttgtattggtttctgcagaaagccaggccagtctccacgctcctgatctatgaagtttccaaccggttctctggagtgccagataggttcagtggcagcgggtcagggacagatttcacactgaaaatcagccgggtggaggctgaggattttggagtttattactgcatgcaagatgcacaagatcctcc +>IGKV2D-28*01 +gatattgtgatgactcagtctccactctccctgcccgtcacccctggagagccggcctccatctcctgcaggtctagtcagagcctcctgcatagtaatggatacaactatttggattggtacctgcagaagccagggcagtctccacagctcctgatctatttgggttctaatcgggcctccggggtccctgacaggttcagtggcagtggatcaggcacagattttacactgaaaatcagcagagtggaggctgaggatgttggggtttattactgcatgcaagctctacaaactcctcc +>IGKV2D-29*01 +gatattgtgatgacccagactccactctctctgtccgtcacccctggacagccggcctccatctcctgcaagtctagtcagagcctcctgcatagtgatggaaagacctatttgtattggtacctgcagaagccaggccagcctccacagctcctgatctatgaagtttccaaccggttctctggagtgccagataggttcagtggcagcgggtcagggacagatttcacactgaaaatcagccgggtggaggctgaggatgttggggtttattactgcatgcaaagtatacagcttcctcc +>IGKV2D-30*01 +gatgttgtgatgactcagtctccactctccctgcccgtcacccttggacagccggcctccatctcctgcaggtctagtcaaagcctcgtatacagtgatggaaacacctacttgaattggtttcagcagaggccaggccaatctccaaggcgcctaatttataaggtttctaactgggactctggggtcccagacagattcagcggcagtgggtcaggcactgatttcacactgaaaatcagcagggtggaggctgaggatgttggggtttattactgcatgcaaggtacacactggcctcc +>IGKV2D-40*01 +gatattgtgatgacccagactccactctccctgcccgtcacccctggagagccggcctccatctcctgcaggtctagtcagagcctcttggatagtgatgatggaaacacctatttggactggtacctgcagaagccagggcagtctccacagctcctgatctatacgctttcctatcgggcctctggagtcccagacaggttcagtggcagtgggtcaggcactgatttcacactgaaaatcagcagggtggaggctgaggatgttggagtttattactgcatgcaacgtatagagtttccttc +>IGKV3-11*01 +gaaattgtgttgacacagtctccagccaccctgtctttgtctccaggggaaagagccaccctctcctgcagggccagtcagagtgttagcagctacttagcctggtaccaacagaaacctggccaggctcccaggctcctcatctatgatgcatccaacagggccactggcatcccagccaggttcagtggcagtgggtctgggacagacttcactctcaccatcagcagcctagagcctgaagattttgcagtttattactgtcagcagcgtagcaactggcctcc +>IGKV3-15*01 +gaaatagtgatgacgcagtctccagccaccctgtctgtgtctccaggggaaagagccaccctctcctgcagggccagtcagagtgttagcagcaacttagcctggtaccagcagaaacctggccaggctcccaggctcctcatctatggtgcatccaccagggccactggtatcccagccaggttcagtggcagtgggtctgggacagagttcactctcaccatcagcagcctgcagtctgaagattttgcagtttattactgtcagcagtataataactggcctcc +>IGKV3-20*01 +gaaattgtgttgacgcagtctccaggcaccctgtctttgtctccaggggaaagagccaccctctcctgcagggccagtcagagtgttagcagcagctacttagcctggtaccagcagaaacctggccaggctcccaggctcctcatctatggtgcatccagcagggccactggcatcccagacaggttcagtggcagtgggtctgggacagacttcactctcaccatcagcagactggagcctgaagattttgcagtgtattactgtcagcagtatggtagctcacctcc +>IGKV3-7*01 +gaaattgtaatgacacagtctccacccaccctgtctttgtctccaggggaaagagtcaccctctcctgcagggccagtcagagtgttagcagcagctacttaacctggtatcagcagaaacctggccaggcgcccaggctcctcatctatggtgcatccaccagggccactagcatcccagccaggttcagtggcagtgggtctgggacagacttcactctcaccatcagcagcctgcagcctgaagattttgcagtttattactgtcagcaggattataacttacctcc +>IGKV3/OR2-268*01 +gaaattgtaatgacacagtctccagccaccctgtctttgtctccaggggaaagagccaccctctcctgcagggccagtcagagtgttagcagcagctacttatcctggtaccagcagaaacctgggcaggctcccaggctcctcatctatggtgcatccaccagggccactggcatcccagccaggttcagtggtagtgggtctgggacagacttcactctcaccatcagcagcctgcagcctgaagattttgcagtttattactgtcagcaggattataacttacctcc +>IGKV3D-11*01 +gaaattgtgttgacacagtctccagccaccctgtctttgtctccaggggaaagagccaccctctcctgcagggccagtcagggtgttagcagctacttagcctggtaccagcagaaacctggccaggctcccaggctcctcatctatgatgcatccaacagggccactggcatcccagccaggttcagtggcagtgggcctgggacagacttcactctcaccatcagcagcctagagcctgaagattttgcagtttattactgtcagcagcgtagcaactggcatcc +>IGKV3D-15*01 +gaaatagtgatgacgcagtctccagccaccctgtctgtgtctccaggggaaagagccaccctctcctgcagggccagtcagagtgttagcagcaacttagcctggtaccagcagaaacctggccaggctcccaggctcctcatctatggtgcatccaccagggccactggcatcccagccaggttcagtggcagtgggtctgggacagagttcactctcaccatcagcagcctgcagtctgaagattttgcagtttattactgtcagcagtataataactggcctcc +>IGKV3D-20*01 +gaaattgtgttgacgcagtctccagccaccctgtctttgtctccaggggaaagagccaccctctcctgcggggccagtcagagtgttagcagcagctacttagcctggtaccagcagaaacctggcctggcgcccaggctcctcatctatgatgcatccagcagggccactggcatcccagacaggttcagtggcagtgggtctgggacagacttcactctcaccatcagcagactggagcctgaagattttgcagtgtattactgtcagcagtatggtagctcacctcc +>IGKV3D-7*01 +gaaattgtaatgacacagtctccagccaccctgtctttgtctccaggggaaagagccaccctctcctgcagggccagtcagagtgttagcagcagctacttatcctggtaccagcagaaacctgggcaggctcccaggctcctcatctatggtgcatccaccagggccactggcatcccagccaggttcagtggcagtgggtctgggacagacttcactctcaccatcagcagcctgcagcctgaagattttgcagtttattactgtcagcaggattataacttacctcc +>IGKV4-1*01 +gacatcgtgatgacccagtctccagactccctggctgtgtctctgggcgagagggccaccatcaactgcaagtccagccagagtgttttatacagctccaacaataagaactacttagcttggtaccagcagaaaccaggacagcctcctaagctgctcatttactgggcatctacccgggaatccggggtccctgaccgattcagtggcagcgggtctgggacagatttcactctcaccatcagcagcctgcaggctgaagatgtggcagtttattactgtcagcaatattatagtactcctcc +>IGKV5-2*01 +gaaacgacactcacgcagtctccagcattcatgtcagcgactccaggagacaaagtcaacatctcctgcaaagccagccaagacattgatgatgatatgaactggtaccaacagaaaccaggagaagctgctattttcattattcaagaagctactactctcgttcctggaatcccacctcgattcagtggcagcgggtatggaacagattttaccctcacaattaataacatagaatctgaggatgctgcatattacttctgtctacaacatgataatttccctct +>IGKV6-21*01 +gaaattgtgctgactcagtctccagactttcagtctgtgactccaaaggagaaagtcaccatcacctgccgggccagtcagagcattggtagtagcttacactggtaccagcagaaaccagatcagtctccaaagctcctcatcaagtatgcttcccagtccttctcaggggtcccctcgaggttcagtggcagtggatctgggacagatttcaccctcaccatcaatagcctggaagctgaagatgctgcaacgtattactgtcatcagagtagtagtttacctca +>IGKV6D-21*01 +gaaattgtgctgactcagtctccagactttcagtctgtgactccaaaggagaaagtcaccatcacctgccgggccagtcagagcattggtagtagcttacactggtaccagcagaaaccagatcagtctccaaagctcctcatcaagtatgcttcccagtccttctcaggggtcccctcgaggttcagtggcagtggatctgggacagatttcaccctcaccatcaatagcctggaagctgaagatgctgcaacgtattactgtcatcagagtagtagtttacctca +>IGKV6D-41*01 +gatgttgtgatgacacagtctccagctttcctctctgtgactccaggggagaaagtcaccatcacctgccaggccagtgaaggcattggcaactacttatactggtaccagcagaaaccagatcaagccccaaagctcctcatcaagtatgcttcccagtccatctcaggggtcccctcgaggttcagtggcagtggatctgggacagatttcacctttaccatcagtagcctggaagctgaagatgctgcaacatattactgtcagcagggcaataagcaccctca +>IGKV7-3*01 +gacattgtgctgacccagtctccagcctccttggccgtgtctccaggacagagggccaccatcacctgcagagccagtgagagtgtcagtttcttgggaataaacttaattcactggtatcagcagaaaccaggacaacctcctaaactcctgatttaccaagcatccaataaagacactggggtcccagccaggttcagcggcagtgggtctgggaccgatttcaccctcacaattaatcctgtggaagctaatgatactgcaaattattactgtctgcagagtaagaattttcctcc diff --git a/data/germline/human/IG_antevolo/IGLJ.fa b/data/germline/human/IG_antevolo/IGLJ.fa new file mode 100644 index 00000000..d74aed59 --- /dev/null +++ b/data/germline/human/IG_antevolo/IGLJ.fa @@ -0,0 +1,14 @@ +>IGLJ1*01 +ttatgtcttcggaactgggaccaaggtcaccgtcctag +>IGLJ2*01 +tgtggtattcggcggagggaccaagctgaccgtcctag +>IGLJ3*01 +tgtggtattcggcggagggaccaagctgaccgtcctag +>IGLJ4*01 +ttttgtatttggtggaggaacccagctgatcattttag +>IGLJ5*01 +ctgggtgtttggtgaggggaccgagctgaccgtcctag +>IGLJ6*01 +taatgtgttcggcagtggcaccaaggtgaccgtcctcg +>IGLJ7*01 +tgctgtgttcggaggaggcacccagctgaccgtcctcg diff --git a/data/germline/human/IG_antevolo/IGLV.fa b/data/germline/human/IG_antevolo/IGLV.fa new file mode 100644 index 00000000..210cb2e3 --- /dev/null +++ b/data/germline/human/IG_antevolo/IGLV.fa @@ -0,0 +1,92 @@ +>IGLV1-36*01 +cagtctgtgctgactcagccaccctcggtgtctgaagcccccaggcagagggtcaccatctcctgttctggaagcagctccaacatcggaaataatgctgtaaactggtaccagcagctcccaggaaaggctcccaaactcctcatctattatgatgatctgctgccctcaggggtctctgaccgattctctggctccaagtctggcacctcagcctccctggccatcagtgggctccagtctgaggatgaggctgattattactgtgcagcatgggatgacagcctgaatggtcc +>IGLV1-40*01 +cagtctgtcgtgacgcagccgccctcagtgtctggggccccagggcagagggtcaccatctcctgcactgggagcagctccaacatcggggcaggttatgatgtacactggtaccagcagcttccaggaacagcccccaaactcctcatctatggtaacagcaatcggccctcaggggtccctgaccgattctctggctccaagtctggcacctcagcctccctggccatcactgggctccaggctgaggatgaggctgattattactgccagtcctatgacagcagcctgagtggttc +>IGLV1-41*01 +cagtctgtgttgacgcagccgccttcagtgtctgcggccccaggacagaaggtcaccatctcctgctctggaagcagctccgacatggggaattatgcggtatcctggtaccagcagctcccaggaacagcccccaaactcctcatctatgaaaataataagcgaccctcagggattcctgaccgattctctggctccaagtctggcacctcagccaccctgggcatcactggcctctggcctgaggacgaggccgattattactgcttagcatgggataccagcccgagagcttg +>IGLV1-44*01 +cagtctgtgctgactcagccaccctcagcgtctgggacccccgggcagagggtcaccatctcttgttctggaagcagctccaacatcggaagtaatactgtaaactggtaccagcagctcccaggaacggcccccaaactcctcatctatagtaataatcagcggccctcaggggtccctgaccgattctctggctccaagtctggcacctcagcctccctggccatcagtgggctccagtctgaggatgaggctgattattactgtgcagcatgggatgacagcctgaatggtcc +>IGLV1-47*01 +cagtctgtgctgactcagccaccctcagcgtctgggacccccgggcagagggtcaccatctcttgttctggaagcagctccaacatcggaagtaattatgtatactggtaccagcagctcccaggaacggcccccaaactcctcatctataggaataatcagcggccctcaggggtccctgaccgattctctggctccaagtctggcacctcagcctccctggccatcagtgggctccggtccgaggatgaggctgattattactgtgcagcatgggatgacagcctgagtggtcc +>IGLV1-50*01 +cagtctgtgctgacgcagccgccctcagtgtctggggccccagggcagagggtcaccatctcctgcactgggagcagctccaacattggggcgggttatgttgtacattggtaccagcagcttccaggaacagcccccaaactcctcatctatggtaacagcaatcggccctcaggggtccctgaccaattctctggctccaagtctggcacctcagcctccctggccatcactggactccagtctgaggatgaggctgattattactgcaaagcatgggataacagcctgaatgctca +>IGLV1-51*01 +cagtctgtgttgacgcagccgccctcagtgtctgcggccccaggacagaaggtcaccatctcctgctctggaagcagctccaacattgggaataattatgtatcctggtaccagcagctcccaggaacagcccccaaactcctcatttatgacaataataagcgaccctcagggattcctgaccgattctctggctccaagtctggcacgtcagccaccctgggcatcaccggactccagactggggacgaggccgattattactgcggaacatgggatagcagcctgagtgctgg +>IGLV1-62*01 +cagtctgtgctgactcagccaccctcagtgtcttgggccacaaggcagaggctcactgtctcctgcactggaagcagctccaacactgggactggctataacgtaaactgttggcagtagctcccaagaactgaccccaaactcctcaggcatggtgataagaattgggcctcctgggtatctgaccaattctctggttccaagtctggcagcttggcctccctgggcaccactgggctctgggctgaggacaagactgattatcactgccagtcccgtgacatctgctgagtgcttg +>IGLV10-54*01 +caggcagggctgactcagccaccctcggtgtccaagggcttgagacagaccgccacactcacctgcactgggaacagcaacaatgttggcaaccaaggagcagcttggctgcagcagcaccagggccaccctcccaaactcctatcctacaggaataacaaccggccctcagggatctcagagagattatctgcatccaggtcaggaaacacagcctccctgaccattactggactccagcctgaggacgaggctgactattactgctcagcatgggacagcagcctcagtgctca +>IGLV11-55*01 +cggcccgtgctgactcagccgccctctctgtctgcatccccgggagcaacagccagactcccctgcaccctgagcagtgacctcagtgttggtggtaaaaacatgttctggtaccagcagaagccagggagctctcccaggttattcctgtatcactactcagactcagacaagcagctgggacctggggtccccagtcgagtctctggctccaaggagacctcaagtaacacagcgtttttgctcatctctgggctccagcctgaggacgaggccgattattactgccaggtgtacgaaagtagtgctaat +>IGLV2-11*01 +cagtctgccctgactcagcctcgctcagtgtccgggtctcctggacagtcagtcaccatctcctgcactggaaccagcagtgatgttggtggttataactatgtctcctggtaccaacagcacccaggcaaagcccccaaactcatgatttatgatgtcagtaagcggccctcaggggtccctgatcgcttctctggctccaagtctggcaacacggcctccctgaccatctctgggctccaggctgaggatgaggctgattattactgctgctcatatgcaggcagctacactttc +>IGLV2-14*01 +cagtctgccctgactcagcctgcctccgtgtctgggtctcctggacagtcgatcaccatctcctgcactggaaccagcagtgacgttggtggttataactatgtctcctggtaccaacagcacccaggcaaagcccccaaactcatgatttatgaggtcagtaatcggccctcaggggtttctaatcgcttctctggctccaagtctggcaacacggcctccctgaccatctctgggctccaggctgaggacgaggctgattattactgcagctcatatacaagcagcagcactctc +>IGLV2-18*01 +cagtctgccctgactcagcctccctccgtgtccgggtctcctggacagtcagtcaccatctcctgcactggaaccagcagtgacgttggtagttataaccgtgtctcctggtaccagcagcccccaggcacagcccccaaactcatgatttatgaggtcagtaatcggccctcaggggtccctgatcgcttctctgggtccaagtctggcaacacggcctccctgaccatctctgggctccaggctgaggacgaggctgattattactgcagctcatatacaagcagcagcactttc +>IGLV2-23*01 +cagtctgccctgactcagcctgcctccgtgtctgggtctcctggacagtcgatcaccatctcctgcactggaaccagcagtgatgttgggagttataaccttgtctcctggtaccaacagcacccaggcaaagcccccaaactcatgatttatgagggcagtaagcggccctcaggggtttctaatcgcttctctggctccaagtctggcaacacggcctccctgacaatctctgggctccaggctgaggacgaggctgattattactgctgctcatatgcaggtagtagcactttac +>IGLV2-33*01 +caatctgccctgactcagcctccttttgtgtccggggctcctggacagtcggtcaccatctcctgcactggaaccagcagtgacgttggggattatgatcatgtcttctggtaccaaaagcgtctcagcactacctccagactcctgatttacaatgtcaatactcggccttcagggatctctgacctcttctcaggctccaagtctggcaacatggcttccctgaccatctctgggctcaagtccgaggttgaggctaattatcactgcagcttatattcaagtagttacactttc +>IGLV2-34*01 +cagtctgttctgactcagcctcgctcagtgtccaggtctcctggacagtaggttactatcttctgcactggaaccagcagtgacattgggggttatgaccttgtctcctggtgccagtagcacccaggcaaagcccccaaactcatgatttatgatgtcgctaattggccctcaggggcccctggttgcttctctggctccaagtctggcaacacggcctccctgaccatctctgggctccaggctgaggacgaggctgattattactgcagctcatatgcaggcagctacaatttc +>IGLV2-5*01 +cagtctgccctgattcagcctccctccgtgtccgggtctcctggacagtcagtcaccatctcctgcactggaaccagcagtgatgttgggagttatgactatgtctcctggtaccaacagcacccaggcacagtccccaaacccatgatctacaatgtcaatactcagccctcaggggtccctgatcgtttctctggctccaagtctggcaatacggcctccatgaccatctctggactccaggctgaggacgaggctgattattagtgctgctcatatacaagcagtgccacttaac +>IGLV2-8*01 +cagtctgccctgactcagcctccctccgcgtccgggtctcctggacagtcagtcaccatctcctgcactggaaccagcagtgacgttggtggttataactatgtctcctggtaccaacagcacccaggcaaagcccccaaactcatgatttatgaggtcagtaagcggccctcaggggtccctgatcgcttctctggctccaagtctggcaacacggcctccctgaccgtctctgggctccaggctgaggatgaggctgattattactgcagctcatatgcaggcagcaacaatttc +>IGLV2-NL1*01 +cagtctgttctgactcagcctcgctcagtgtccaggtctcctggacagtaggttactatcttctgcactggaaccagcagtgacattgggggttatgaccttgtctcctggtgccagtagcacccaggcaaagcccccaaactcatgatttatgatgtcggtaattggccctcaggggcccctggttgcttctctggctccaagtctggcaacacggcctccctgaccatctctgggctccaggctgaggacgaggctgattattactgcagctcatatgcaggcagctacaatttc +>IGLV3-1*01 +tcctatgagctgactcagccaccctcagtgtccgtgtccccaggacagacagccagcatcacctgctctggagataaattgggggataaatatgcttgctggtatcagcagaagccaggccagtcccctgtgctggtcatctatcaagatagcaagcggccctcagggatccctgagcgattctctggctccaactctgggaacacagccactctgaccatcagcgggacccaggctatggatgaggctgactattactgtcaggcgtgggacagcagcactgca +>IGLV3-10*01 +tcctatgagctgacacagccaccctcggtgtcagtgtccccaggacaaacggccaggatcacctgctctggagatgcattgccaaaaaaatatgcttattggtaccagcagaagtcaggccaggcccctgtgctggtcatctatgaggacagcaaacgaccctccgggatccctgagagattctctggctccagctcagggacaatggccaccttgactatcagtggggcccaggtggaggatgaagctgactactactgttactcaacagacagcagtggtaatcatag +>IGLV3-12*01 +tcctatgagctgactcagccacactcagtgtcagtggccacagcacagatggccaggatcacctgtgggggaaacaacattggaagtaaagctgtgcactggtaccagcaaaagccaggccaggaccctgtgctggtcatctatagcgatagcaaccggccctcagggatccctgagcgattctctggctccaacccagggaacaccaccaccctaaccatcagcaggatcgaggctggggatgaggctgactattactgtcaggtgtgggacagtagtagtgatcatcc +>IGLV3-13*01 +tcctatgagctgacacagccacccgcggtgtcagtgtccccaggacagacagccaggatcagctgctctggagatgtactgagggataattatgctgactggtacccgcaaaagccaggccaggcccctgtgctggtgatatataaagatggtgagcggccctctggaatccctgagcgattctctgggtccacctcagggaacacaaccgccctgaccattagcagggtcctgaccaaaggcggggctgactattactgtttttctggtgattagaacaatct +>IGLV3-16*01 +tcctatgagctgacacagccaccctcggtgtcagtgtccctaggacagatggccaggatcacctgctctggagaagcattgccaaaaaaatatgcttattggtaccagcagaagccaggccagttccctgtgctggtgatatataaagacagcgagaggccctcagggatccctgagcgattctctggctccagctcagggacaatagtcacattgaccatcagtggagtccaggcagaagacgaggctgactattactgtctatcagcagacagcagtggtacttatcc +>IGLV3-19*01 +tcttctgagctgactcaggaccctgctgtgtctgtggccttgggacagacagtcaggatcacatgccaaggagacagcctcagaagctattatgcaagctggtaccagcagaagccaggacaggcccctgtacttgtcatctatggtaaaaacaaccggccctcagggatcccagaccgattctctggctccagctcaggaaacacagcttccttgaccatcactggggctcaggcggaagatgaggctgactattactgtaactcccgggacagcagtggtaaccatct +>IGLV3-21*01 +tcctatgtgctgactcagccaccctcggtgtcagtggccccaggaaagacggccaggattacctgtgggggaaacaacattggaagtaaaagtgtgcactggtaccagcagaagccaggccaggcccctgtgctggtcgtctatgatgatagcgaccggccctcagggatccctgagcgattctctggctccaactctgggaacacggccaccctgaccatcagcagggtcgaagccggggatgaggccgactattactgtcaggtgtgggatagtagtagtgatcatcc +>IGLV3-22*01 +tcctatgagctgacacagctaccctcggtgtcagtgtccccaggacagacagccaggatcacctgctctggagatgtactgggggaaaattatgctgactggtaccagcagaagccaggccaggcccctgagttggtgatatacgaagatagtgagcggtaccctggaatccctgaacgattctctgggtccacctcagggaacacgaccaccctgaccatcagcagggtcctgaccgaagacgaggctgactattactgtttgtctggggatgaggacaatcc +>IGLV3-25*01 +tcctatgagctgacacagccaccctcggtgtcagtgtccccaggacagacggccaggatcacctgctctggagatgcattgccaaagcaatatgcttattggtaccagcagaagccaggccaggcccctgtgctggtgatatataaagacagtgagaggccctcagggatccctgagcgattctctggctccagctcagggacaacagtcacgttgaccatcagtggagtccaggcagaagatgaggctgactattactgtcaatcagcagacagcagtggtacttatcc +>IGLV3-27*01 +tcctatgagctgacacagccatcctcagtgtcagtgtctccgggacagacagccaggatcacctgctcaggagatgtactggcaaaaaaatatgctcggtggttccagcagaagccaggccaggcccctgtgctggtgatttataaagacagtgagcggccctcagggatccctgagcgattctccggctccagctcagggaccacagtcaccttgaccatcagcggggcccaggttgaggatgaggctgactattactgttactctgcggctgacaacaatct +>IGLV3-31*01 +tcctctgagctgagtcaggagcctgcagtgtctgtggccttgggatagacagccaggatcacctgccagggagacagcatagaagactccgttgtaaactggtacaagcagaagccaagccaggcccctgggctggtcatctaacttaacagtgtccagtcttcagggattcctaagaaattctctggctccagctcagggaacatggccaccctgaccatcactgggattcaggttgaagacaaggctgactattactgtcagtcatgggacagcagtcgtactcattc +>IGLV3-32*01 +tcctctgggccaactcaggtgcctgcagtgtctgtggccttgggacaaatggccaggatcacctgccagggagacagcatggaaggctcttatgaacactggtaccagcagaagccaggccaggcccccgtgctggtcatctatgatagcagtgaccggccctcaaggatccctgagcgattctctggctccaaatcaggcaacacaaccaccctgaccatcactggggcccaggctgaggatgaggctgattattactatcagttgatagacaaccatgctac +>IGLV3-9*01 +tcctatgagctgactcagccactctcagtgtcagtggccctgggacagacggccaggattacctgtgggggaaacaacattggaagtaaaaatgtgcactggtaccagcagaagccaggccaggcccctgtgctggtcatctatagggatagcaaccggccctctgggatccctgagcgattctctggctccaactcggggaacacggccaccctgaccatcagcagagcccaagccggggatgaggctgactattactgtcaggtgtgggacagcagcactgcacaccc +>IGLV4-3*01 +ctgcctgtgctgactcagcccccgtctgcatctgccttgctgggagcctcgatcaagctcacctgcaccctaagcagtgagcacagcacctacaccatcgaatggtatcaacagagaccagggaggtccccccagtatataatgaaggttaagagtgatggcagccacagcaagggggacgggatccccgatcgcttcatgggctccagttctggggctgaccgctacctcaccttctccaacctccagtctgacgatgaggctgagtatcactgtggagagagccacacgattgatggccaagtcggttgagc +>IGLV4-60*01 +cagcctgtgctgactcaatcatcctctgcctctgcttccctgggatcctcggtcaagctcacctgcactctgagcagtgggcacagtagctacatcatcgcatggcatcagcagcagccagggaaggcccctcggtacttgatgaagcttgaaggtagtggaagctacaacaaggggagcggagttcctgatcgcttctcaggctccagctctggggctgaccgctacctcaccatctccaacctccagtttgaggatgaggctgattattactgtgagacctgggacagtaacactca +>IGLV4-69*01 +cagcttgtgctgactcaatcgccctctgcctctgcctccctgggagcctcggtcaagctcacctgcactctgagcagtgggcacagcagctacgccatcgcatggcatcagcagcagccagagaagggccctcggtacttgatgaagcttaacagtgatggcagccacagcaagggggacgggatccctgatcgcttctcaggctccagctctggggctgagcgctacctcaccatctccagcctccagtctgaggatgaggctgactattactgtcagacctggggcactggcattca +>IGLV5-37*01 +cagcctgtgctgactcagccaccttcctcctccgcatctcctggagaatccgccagactcacctgcaccttgcccagtgacatcaatgttggtagctacaacatatactggtaccagcagaagccagggagccctcccaggtatctcctgtactactactcagactcagataagggccagggctctggagtccccagccgcttctctggatccaaagatgcttcagccaatacagggattttactcatctccgggctccagtctgaggatgaggctgactattactgtatgatttggccaagcaatgcttct +>IGLV5-39*01 +cagcctgtgctgactcagccaacctccctctcagcatctcctggagcatcagccagattcacctgcaccttgcgcagtggcatcaatgttggtacctacaggatatactggtaccagcagaagccagggagtcttccccggtatctcctgaggtacaaatcagactcagataagcagcagggctctggagtccccagccgcttctctggatccaaagatgcttcaaccaatgcaggccttttactcatctctgggctccagtctgaagatgaggctgactattactgtgccatttggtacagcagcacttct +>IGLV5-45*01 +caggctgtgctgactcagccgtcttccctctctgcatctcctggagcatcagccagtctcacctgcaccttgcgcagtggcatcaatgttggtacctacaggatatactggtaccagcagaagccagggagtcctccccagtatctcctgaggtacaaatcagactcagataagcagcagggctctggagtccccagccgcttctctggatccaaagatgcttcggccaatgcagggattttactcatctctgggctccagtctgaggatgaggctgactattactgtatgatttggcacagcagcgcttct +>IGLV5-48*01 +cagcctgtgctgactcagccaacttccctctcagcatctcctggagcatcagccagactcacctgcaccttgcgcagtggcatcaatcttggtagctacaggatattctggtaccagcagaagccagagagccctccccggtatctcctgagctactactcagactcaagtaagcatcagggctctggagtccccagccgcttctctggatccaaagatgcttcgagcaatgcagggattttagtcatctctgggctccagtctgaggatgaggctgactattactgtatgatttggcacagcagtgcttct +>IGLV5-52*01 +cagcctgtgctgactcagccatcttcccattctgcatcttctggagcatcagtcagactcacctgcatgctgagcagtggcttcagtgttggggacttctggataaggtggtaccaacaaaagccagggaaccctccccggtatctcctgtactaccactcagactccaataagggccaaggctctggagttcccagccgcttctctggatccaacgatgcatcagccaatgcagggattctgcgtatctctgggctccagcctgaggatgaggctgactattactgtggtacatggcacagcaactctaagactca +>IGLV6-57*01 +aattttatgctgactcagccccactctgtgtcggagtctccggggaagacggtaaccatctcctgcacccgcagcagtggcagcattgccagcaactatgtgcagtggtaccagcagcgcccgggcagttcccccaccactgtgatctatgaggataaccaaagaccctctggggtccctgatcggttctctggctccatcgacagctcctccaactctgcctccctcaccatctctggactgaagactgaggacgaggctgactactactgtcagtcttatgatagcagcaatca +>IGLV7-43*01 +cagactgtggtgactcaggagccctcactgactgtgtccccaggagggacagtcactctcacctgtgcttccagcactggagcagtcaccagtggttactatccaaactggttccagcagaaacctggacaagcacccagggcactgatttatagtacaagcaacaaacactcctggacccctgcccggttctcaggctccctccttgggggcaaagctgccctgacactgtcaggtgtgcagcctgaggacgaggctgagtattactgcctgctctactatggtggtgctcag +>IGLV7-46*01 +caggctgtggtgactcaggagccctcactgactgtgtccccaggagggacagtcactctcacctgtggctccagcactggagctgtcaccagtggtcattatccctactggttccagcagaagcctggccaagcccccaggacactgatttatgatacaagcaacaaacactcctggacacctgcccggttctcaggctccctccttgggggcaaagctgccctgaccctttcgggtgcgcagcctgaggatgaggctgagtattactgcttgctctcctatagtggtgctcgg +>IGLV8-61*01 +cagactgtggtgacccaggagccatcgttctcagtgtcccctggagggacagtcacactcacttgtggcttgagctctggctcagtctctactagttactaccccagctggtaccagcagaccccaggccaggctccacgcacgctcatctacagcacaaacactcgctcttctggggtccctgatcgcttctctggctccatccttgggaacaaagctgccctcaccatcacgggggcccaggcagatgatgaatctgattattactgtgtgctgtatatgggtagtggcatttc +>IGLV8/OR8-1*01 +cagtctgtggtgacccaggagccatcactctcagggtctcctggagggacggtcacactcacctgtgccctgagctctggctcagtctctaccagtcactaccccaggtggtaccagcagaccccaggccaggctccacacatgctcatctgcagcccaaacacctgcccttctggggtccctggtcgcttctctggctccatccttgggaacaaagctgccctcaccatcacggggactcaggtagatgatgactctgatcattactgtgtgctgtacatgggtagtggcaat +>IGLV9-49*01 +cagcctgtgctgactcagccaccttctgcatcagcctccctgggagcctcggtcacactcacctgcaccctgagcagcggctacagtaattataaagtggactggtaccagcagagaccagggaagggcccccggtttgtgatgcgagtgggcactggtgggattgtgggatccaagggggatggcatccctgatcgcttctcagtcttgggctcaggcctgaatcggtacctgaccatcaagaacatccaggaagaggatgagagtgactaccactgtggggcagaccatggcagtgggagcaacttcgtgtaacc diff --git a/ig_simulator.py b/ig_simulator.py new file mode 100755 index 00000000..4371b2da --- /dev/null +++ b/ig_simulator.py @@ -0,0 +1,248 @@ +#!/usr/bin/env python2 + +import os +import sys +import init +import logging +import shutil +import ntpath + +import process_cfg +import support +import argparse +from argparse import ArgumentParser + +home_directory = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) +cdr_labeler_config_dir = os.path.join(home_directory, "configs", "cdr_labeler") +vj_finder_config_dir = os.path.join(home_directory, "configs", "vj_finder") +ig_simulator_config_dir = os.path.join(home_directory, "configs", "ig_simulator") +ig_simulator_bin = os.path.join(home_directory, "build", "release", "bin", "ig_simulator") +data_annotation_dir = os.path.join(home_directory, "data/annotation") + +test_dir = os.path.join(home_directory, "ig_simulator_test") + +tool_name = "IgSimulator" + + +def CheckBinariesExistance(params, log): + if not os.path.exists(ig_simulator_bin): + log.info("ERROR: Binary files were not found. Please compile " + tool_name + " before running.") + sys.exit(1) + + +def TreeStrategyCorrect(tree_strategy): + return tree_strategy in ["uniform", "wide", "deep"] + + +def LociParamCorrect(loci): + return loci in ["IGH", "IGK", "IGL"] + + +def CheckParamsCorrectness(params, log): + if not LociParamCorrect(params.loci): + log.info("Loci " + params.loci + " is not recognized") + sys.exit(1) + if not TreeStrategyCorrect(params.tree_strategy): + log.info("Tree Strategy " + params.tree_strategy + " is not recognized") + sys.exit(1) + + +def SetOutputParams(params, log): + params.output_dir = os.path.abspath(params.output_dir) + params.output_config_dir = os.path.join(params.output_dir, "configs") + + +def PrepareOutputDir(params): + if os.path.exists(params.output_dir): + shutil.rmtree(params.output_dir) + os.makedirs(params.output_dir) + + +def check_positive(value): + ivalue = int(value) + if ivalue <= 0: + raise argparse.ArgumentTypeError("%s is an invalid positive int value" % value) + return ivalue + + +def parse_args(): + parser = ArgumentParser(description="== " + tool_name + ": a tool for simulating antibody repertoires, clonal lineages and trees ==", + epilog="In case you have troubles running " + tool_name + ", you can write to igtools_support@googlegroups.com." + "Please provide us with igsimulator.log file from the output directory.", + add_help=False) + req_args = parser.add_argument_group("Required params") + output_args = req_args.add_mutually_exclusive_group(required=True) + output_args.add_argument("-o", "--output", + type=str, + default="", + dest="output_dir", + help="Output directory") + + output_args.add_argument("--test", + action="store_const", + const=test_dir, + dest="output_dir", + help="Running in test mode") + + optional_args = parser.add_argument_group("Optional arguments") + + optional_args.add_argument("-l", "--loci", + type=str, + default="IGH", + dest="loci", + help="Loci: IGH, IGK, IGL" # ", TRA, TRB, TRG, TRD, TR (all TCRs) or all. " + "[default: %(default)s]") + + optional_args.add_argument("-s", "--tree_strategy", + type=str, + default="deep", + dest="tree_strategy", + help="Tree strategy to use: uniform, wide, deep [default: %(default)s]") + + optional_args.add_argument("-n", "--n_metaroots", + type=check_positive, + default=10, + dest="number_of_metaroots") + + optional_args.add_argument("-h", "--help", + action="help", + help="Help message and exit") + return parser.parse_args() + + +def get_logger(): + log = logging.getLogger(tool_name) + log.setLevel(logging.DEBUG) + console = logging.StreamHandler(sys.stdout) + console.setFormatter(logging.Formatter('%(message)s')) + console.setLevel(logging.DEBUG) + log.addHandler(console) + return log + + +def add_log_handler(params, log): + # log file + params.log_filename = os.path.join(params.output_dir, "ig_simulator.log") + if os.path.exists(params.log_filename): + os.remove(params.log_filename) + log_handler = logging.FileHandler(params.log_filename, mode='a') + log.addHandler(log_handler) + return log + + +def PrintParams(params, log): + log.info(tool_name + " parameters:") + log.info(" Output directory:\t" + params.output_dir + "\n") + log.info(" Loci:\t\t\t" + params.loci) + log.info(" # of metaroots:\t\t" + str(params.number_of_metaroots) + "\n") + log.info(" Tree strategy:\t\t" + params.tree_strategy + "\n") + +######################################################################################################################## + + +def CopyConfigs(params, log): + if os.path.exists(params.output_config_dir): + shutil.rmtree(params.output_config_dir) + params.cdr_labeler_config_dir = os.path.abspath(os.path.join(params.output_config_dir, "cdr_labeler")) + params.cdr_labeler_config_filename = os.path.join(params.cdr_labeler_config_dir, "config.info") + + params.vj_finder_config_dir = os.path.abspath(os.path.join(params.output_config_dir, "vj_finder")) + params.vj_finder_config_filename = os.path.join(params.vj_finder_config_dir, "config.info") + + shutil.copytree(ig_simulator_config_dir, params.output_config_dir) + shutil.copytree(cdr_labeler_config_dir, params.cdr_labeler_config_dir) + shutil.copytree(vj_finder_config_dir, params.vj_finder_config_dir) + + params.output_config_file = os.path.join(params.output_config_dir, "config.info") + if not os.path.exists(params.output_config_file): + log.info("ERROR: Config file " + params.output_config_file + " was not found") + sys.exit(1) + + +def ModifyParamsWrtOrganism(params, cdr_param_dict): + params.organism = "human" + cdr_param_dict['imgt_v_annotation'] = os.path.join(data_annotation_dir, params.organism + "_v_imgt.txt") + cdr_param_dict['kabat_v_annotation'] = os.path.join(data_annotation_dir, params.organism + "_v_kabat.txt") + cdr_param_dict['imgt_j_annotation'] = os.path.join(data_annotation_dir, params.organism + "_j_imgt.txt") + cdr_param_dict['kabat_j_annotation'] = os.path.join(data_annotation_dir, params.organism + "_j_kabat.txt") + return cdr_param_dict + + +def ModifyConfigFiles(params, log): + igs_params_dict = dict() + igs_params_dict['output_dir'] = params.output_dir + igs_params_dict['loci'] = params.loci + igs_params_dict['number_of_metaroots'] = params.number_of_metaroots + igs_params_dict['pool_manager_strategy'] = params.tree_strategy + igs_params_dict['germline_dir'] = os.path.join(home_directory, "data/germline") + igs_params_dict['cdr_labeler_config_filename'] = params.cdr_labeler_config_filename + + cdr_params_dict = dict() + cdr_params_dict['vj_finder_config'] = params.vj_finder_config_filename + + vjf_params_dict = dict() + params.germline_config_file = os.path.join(params.vj_finder_config_dir, "germline_files_config.txt") + vjf_params_dict['germline_filenames_config'] = params.germline_config_file + vjf_params_dict['germline_dir'] = os.path.join(home_directory, "data/germline") + igs_params_dict['germline_filenames_config'] = params.germline_config_file + + cdr_params_dict = ModifyParamsWrtOrganism(params, cdr_params_dict) + process_cfg.substitute_params(params.output_config_file, igs_params_dict, log) + process_cfg.substitute_params(params.cdr_labeler_config_filename, cdr_params_dict, log) + process_cfg.substitute_params(params.vj_finder_config_filename, vjf_params_dict, log) + + +def PrepareConfigs(params, log): + CopyConfigs(params, log) + ModifyConfigFiles(params, log) + + +def RunTool(params, log): + try: + igs_command_line = ig_simulator_bin + " " + \ + params.output_config_file + support.sys_call(igs_command_line, log) + log.info("\nThank you for using " + tool_name + "!\n") + except (KeyboardInterrupt): + log.info("\n" + tool_name + " was interrupted!") + except Exception: + exc_type, exc_value, _ = sys.exc_info() + if exc_type == SystemExit: + sys.exit(exc_value) + else: + log.exception(exc_value) + log.info("\nERROR: Exception caught.") + except BaseException: + exc_type, exc_value, _ = sys.exc_info() + if exc_type == SystemExit: + sys.exit(exc_value) + else: + log.exception(exc_value) + log.info("\nERROR: Exception caught.") + + +def main(argv): + log = get_logger() + params = parse_args() + print(params) + CheckBinariesExistance(params, log) + CheckParamsCorrectness(params, log) + SetOutputParams(params, log) + + PrepareOutputDir(params) + log = add_log_handler(params, log) + + # print command line + command_line = "Command_line: " + command_line += " ".join(argv) + log.info(command_line + "\n") + PrintParams(params, log) + log.info("Log will be written to " + params.log_filename + "\n") + + PrepareConfigs(params, log) + + RunTool(params, log) + log.info("Log was written to " + params.log_filename) + +if __name__ == '__main__': + main(sys.argv) diff --git a/ig_simulator_manual.html b/ig_simulator_manual.html new file mode 100644 index 00000000..edd60154 --- /dev/null +++ b/ig_simulator_manual.html @@ -0,0 +1,255 @@ + + IgSimulator 2.0.alpha Manual + + + + + +

IgSimulator 2.0.alpha manual

+ +1. What is IgSimulator?
+ +2. Installation
+    2.1. Verifying your installation
+ +3. IgSimulator usage
+    3.1. Basic options
+    3.2. Advanced options
+    3.3. Examples
+    3.4. Output files
+ +4. Output file formats
+    4.1. Base repertoire fasta
+    4.2. Base repertoire info
+    4.3. Full and filtered pool fasta
+    4.4. Clonal Trees files
+ + + +5. Feedback and bug reports
+ + + +

1. What is IgSimulator?

+

+ IgSimulator is a tool for simulation of antibody repertoires, clonal lineages and clonal trees. + It performs the following steps: +

+

+ +Some vertices of a clonal tree are marked absent to imitate evolutionary process. + + + +

2. Installation

+ +IgSimulator has the following dependencies: + + +To assemble IgSimulator, type +
+    
+    make
+    
+
+ +To install IgSimulator (after the previous step) type +
+    
+    make install
+    
+
+ +If you want to install IgSimulator to a particular path $YOUR_PATH, type +
+    
+    make install prefix=$YOUR_PATH
+    
+
+ + +

2.1. Verifying your installation

+ +► To try IgSimulator, run: +

+    ./ig_simulator.py --test
+
+
+ +Test run should take not more than several seconds. +If the installation of IgSimulator is successful, you will find the following information at the end of the log: + +
+    
+    Thank you for using IgSimulator!
+    Log was written to <your_installation_dir>/ig_simulator_test/ig_simulator.log
+    
+
+ + +

3. IgSimulator usage

+ +To run IgSimulator, type: +
+    
+    ./ig_simulator.py [options] -o <output_dir>
+    
+
+ + +

3.1. Basic options

+ +-o / --output <output_dir>
+output directory (required). + +

+ +--test
+Running at default parameters at a test directory. +Command line corresponding to the test run is equivalent to the following: +
+    
+    ./ig_simulator.py -o ig_simulator_test
+    
+
+ +--help
+Printing help. + +

+ + +

3.2. Advanced options

+ +-l / --loci <str>
+Immunological loci to simulate V(D)J-recombination.
+Available values are IGH / IGL / IGK. +Default value is IGH. + +

+ +-n / --n_metaroots <int>
+Number of metaroots (results of V(D)J-recombinations) to simulate. +Default value is 10. + +

+ +-s / --tree_strategy <str>
+Strategy to simulate clonal trees. +Available values are deep / wide / uniform. +Default value is deep. + + + +

3.3. Examples

+To perform simulation of 50 metaroots with clonal tree simulation strategy uniform +and output to ig_simulator_test directory, run +
+    
+    ./ig_simulator.py -n 50 -s uniform -o ig_simulator_test
+    
+
+ + + +

3.4. Output files

+IgSimulator creates working directory (which name was specified using option -o) +and outputs the following files and directories there: + + + + + + + + +

4. Output file formats

+ +

4.1. Base repertoire fasta

+base_repertoire.fasta presents all simulated metaroots in fasta format. +Id of each metaroot matches the pattern forest_X_multiplicity_Y where X is a zero-based number of the metaroot (max is param -n minus one) +and Y is the number of trees that are simulated with this metaroot as a root. + +

4.2. Base repertoire info

+base_repertoire.info presents the following information about each simulated metaroot + + +

4.3. Full and filtered pool fasta

+full_pool.fasta presents the full pool of all sequences that are simulated. +Id of each sequence matches the pattern forest_X_tree_Y_antibody_Z where + + +Due to clonal selection a certain number of sequences is absent from the real repertoire. +filtered_pool.fasta presents sequences in the same format as of full_pool.fasta. +However, the former is a subset of the latter. + +

4.4. Clonal trees files

+ +Each file in the directory trees_dir represents a certain simulated clonal tree in ready-to-draw dot format. +The name of each file matches the pattern forest_X_tree_Y.dot where + + +The id of each vertex is the Z defined here. +Productive/non-productive sequences are shaped as circles/rectangulars. +Absent seqs (that are present only in full_pool.fasta but not in the filtered_pool.fasta) are colored in magenta. +Additionally dot file containes comments about simulated SHMs. + + + +

5. Feedback and bug reports

+Your comments, bug reports, and suggestions are very welcome. +They will help us to further improve IgSimulator. +

+If you have any trouble running IgSimulator, please send us the log file from the output directory. +

+Address for communications: igtools_support@googlegroups.com. diff --git a/py/ig_simulator_tools/naive_tree_simulator.py b/py/ig_simulator_tools/naive_tree_simulator.py new file mode 100644 index 00000000..dbdbb3b3 --- /dev/null +++ b/py/ig_simulator_tools/naive_tree_simulator.py @@ -0,0 +1,179 @@ +from __future__ import division +import os +import errno + +from Bio import SeqIO +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord +import numpy as np +import random + + +def smart_makedirs(dirname): + try: + os.makedirs(dirname) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise exc + + +def random_mutations(antibody, pois_p=1): + antibody = list(antibody) + n_mut = np.random.poisson(pois_p, 1)[0] + 1 # at least one mutation + mut_ind = np.random.choice(range(len(antibody)), size=n_mut, replace=False) + mut_ind = [i for i in mut_ind if antibody[i] != 'N'] + + def rand_nucl(nucl): + bases = list("ACGT") + bases.remove(nucl) + return bases[np.random.randint(3)] + + mut_ind = [(i, rand_nucl(antibody[i])) for i in mut_ind] + + for i, nucl in mut_ind: + antibody[i] = nucl + + return "".join(antibody), mut_ind + + +class ParsedRecord(object): + def __init__(self, record): + record.name = str(record.name) + self.name, self.multiplicity = [int(x) for x in record.name.split('_') if x.isdigit()] + self.metaroot_seq = str(record.seq) + + +class Node(object): + def __init__(self, seq, numb): + self.seq = seq + self.children = [] + self.numb = numb + self.included = True + + +def generate_tree(seq, n, ret_prob, pois_p): + pool = [] + root = Node(seq, 0) + pool.append(root) + indeces = [] + weights = [] + for i in xrange(n): + indeces.append(i) + weights.append(1) + index = np.random.choice(indeces, size=1, + p=np.array(weights) / sum(weights))[0] + weights[index] += 1 + e = pool[index] + mut_seq, mutations = random_mutations(e.seq, pois_p) + mut_e = Node(mut_seq, i + 1) + e.children.append((mut_e, mutations)) + bern = np.random.binomial(1, ret_prob, 1)[0] + if not bern: + e.included = False + pool.append(mut_e) + + results = {'root': root} + results['all_seqs'] = [(id, x.seq) for id, x in enumerate(pool)] + results['filtered_seqs'] = [(id, x.seq) for id, x in enumerate(pool) if x.included] + results['edge_list'] = edge_list(root) + return results + + +def edge_list(root): + elist = [(root.numb, x.numb, mut) for x, mut in root.children] + for x, mut in root.children: + elist += edge_list(x) + return elist + + +def go(records, lamb, ret_prob, pois_p): + records = [ParsedRecord(record) for record in records] + results = [] + for i, record in enumerate(records): + print(i + 1, len(records)) + results.append([generate_tree(record.metaroot_seq, + np.random.geometric(lamb, size=1)[0], + ret_prob=ret_prob, + pois_p=pois_p) + for _ in xrange(record.multiplicity)]) + return results + + +def output_forests(forests, output_folder = ""): + all_records, filtered_records = [], [] + + def create_records(x): + return [SeqRecord(Seq(seq), + id="metaroot_%d_tree_%d_id_%d" % (i, m, id), + description="") + for id, seq in x] + + for i, forest in enumerate(forests): + for m, tree in enumerate(forest): + all_records += create_records(tree['all_seqs']) + filtered_records += create_records(tree['filtered_seqs']) + + smart_makedirs(output_folder) + SeqIO.write(filtered_records, os.path.join(output_folder, "filtered_records.fasta"), "fasta") + SeqIO.write(all_records, os.path.join(output_folder, "all_records.fasta"), "fasta") + + edge_lists_dir = os.path.join(output_folder, "trees_edge_lists") + smart_makedirs(edge_lists_dir) + for i, forest in enumerate(forests): + for m, tree in enumerate(forest): + with open(os.path.join(edge_lists_dir, "antibody_%d_tree_%d.txt" %(i, m)), "w") as f: + for tup in tree['edge_list']: + f.write("%d %d %s\n" % tup) + + +def ParseCommandLineParams(): + import argparse + current_dir = os.path.dirname(os.path.realpath(__file__)) + parser = argparse.ArgumentParser() + parser.add_argument("-o", "--outdir", + type=str, + default=current_dir, + help="Output directory") + root_dir = os.path.realpath(os.path.join(current_dir, "../../")) + input_file = os.path.join(root_dir, "ig_simulator_test/test.fa") + parser.add_argument("-i", "--input", + type=str, + default=input_file) + parser.add_argument("--seed", + type=int, + default=int(np.random.randint(low=0, high=100000, size=1)[0])) + parser.add_argument("-s", "--exp_tree_size", + type=float, + default=0.01, + help="Mean tree size") + parser.add_argument("-r", "--ret_prob", + type=float, + default=0.5, + help="Probability to return chosen antibody to the pool") + parser.add_argument("-p", "--pois_p", + type=float, + default=1., + help="Pois parameter for number of mut") + return parser.parse_args() + + +def dump_params(params): + import json + smart_makedirs(params.outdir) + with open(os.path.join(params.outdir, "params.txt"), 'w') as f: + json.dump(vars(params), f, sort_keys=True, indent=4) + + +def main(): + params = ParseCommandLineParams() + dump_params(params) + np.random.seed(params.seed) + with open(params.input, "r") as f: + records = list(SeqIO.parse(f, "fasta")) + + results = go(records, lamb=params.exp_tree_size, ret_prob=params.ret_prob, pois_p=params.pois_p) + output_forests(results, params.outdir) + + +if __name__ == "__main__": + main() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a91655a2..16468cd5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -35,6 +35,7 @@ set(ALGORITHMS_DIR "${IGREC_MAIN_SRC_DIR}/algorithms") set(VDJ_UTILS_DIR "${IGREC_MAIN_SRC_DIR}/vdj_utils") set(VJ_FINDER_DIR "${IGREC_MAIN_SRC_DIR}/vj_finder") set(CDR_LABELER_DIR "${IGREC_MAIN_SRC_DIR}/cdr_labeler") +set(IG_SIMULATOR_DIR "${IGREC_MAIN_SRC_DIR}/ig_simulator") # Everything option-dependent include(options) @@ -89,3 +90,4 @@ add_subdirectory(cdr_labeler) add_subdirectory(test) add_subdirectory(umi_experiments) add_subdirectory(pcr_simulator) +add_subdirectory(ig_simulator) diff --git a/src/cdr_labeler/cdr_launch.cpp b/src/cdr_labeler/cdr_launch.cpp index a41ae25f..361f5cae 100644 --- a/src/cdr_labeler/cdr_launch.cpp +++ b/src/cdr_labeler/cdr_launch.cpp @@ -1,12 +1,13 @@ #include "cdr_launch.hpp" #include -#include "germline_db_generator.hpp" +#include "germline_utils/germline_db_generator.hpp" #include "germline_db_labeler.hpp" #include "vj_parallel_processor.hpp" #include "read_labeler.hpp" #include "cdr_output.hpp" #include "diversity_analyser.hpp" +#include "germline_utils/germline_config.hpp" //#include "cdr_annotator.hpp" namespace cdr_labeler { @@ -16,7 +17,7 @@ namespace cdr_labeler { core::ReadArchive read_archive(config_.input_params.input_reads); if(config_.vj_finder_config.io_params.output_params.output_details.fix_spaces) read_archive.FixSpacesInHeaders(); - vj_finder::GermlineDbGenerator db_generator(config_.vj_finder_config.io_params.input_params.germline_input, + germline_utils::GermlineDbGenerator db_generator(config_.vj_finder_config.io_params.input_params.germline_input, config_.vj_finder_config.algorithm_params.germline_params); INFO("Generation of DB for variable segments..."); germline_utils::CustomGeneDatabase v_db = db_generator.GenerateVariableDb(); diff --git a/src/ig_simulator/CMakeLists.txt b/src/ig_simulator/CMakeLists.txt new file mode 100644 index 00000000..a78a270f --- /dev/null +++ b/src/ig_simulator/CMakeLists.txt @@ -0,0 +1,58 @@ +project(ig_simulator CXX) + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) +include_directories(${SPADES_MAIN_INCLUDE_DIR}) +include_directories(${CORE_DIR}) +include_directories(${VDJ_UTILS_DIR}) +include_directories(${VJ_FINDER_DIR}) +include_directories(${CDR_LABELER_DIR}) + +add_library(ig_simulator_library STATIC + ig_simulator_config.cpp + ig_simulator_launch.cpp + base_repertoire/gene_chooser/abstract_gene_chooser.cpp + base_repertoire/gene_chooser/uniform_gene_chooser.cpp + base_repertoire/gene_chooser/config_based_getter.cpp + base_repertoire/nucleotides_remover/abstract_nucleotides_remover.cpp + base_repertoire/nucleotides_remover/uniform_nucleotides_remover.cpp + base_repertoire/nucleotides_remover/config_based_getter.cpp + simulation_routines.cpp + base_repertoire/p_nucleotides_creator/abstract_nucleotides_creator.cpp + base_repertoire/p_nucleotides_creator/uniform_nucleotides_creator.cpp + base_repertoire/p_nucleotides_creator/config_based_getter.cpp + base_repertoire/metaroot_creator/metaroot_creator.cpp + base_repertoire/metaroot_creator/config_based_getter.cpp + base_repertoire/n_nucleotides_inserter/abstract_n_nucleotides_inserter.cpp + base_repertoire/n_nucleotides_inserter/uniform_n_nucleotides_inserter.cpp + base_repertoire/n_nucleotides_inserter/config_based_getter.cpp + base_repertoire/metaroot/metaroot.cpp + base_repertoire/productivity_checker/productivity_checker.cpp + base_repertoire/multiplicity_creator/multiplicity_creator.cpp + base_repertoire/base_repertoire_simulator.cpp + base_repertoire/metaroot_cluster/metaroot_cluster.cpp + base_repertoire/base_repertoire.cpp + clonal_trees/tree/node.cpp + clonal_trees/tree_creator/pool_manager.cpp + clonal_trees/tree_creator/cartesian_tree.cpp + clonal_trees/tree/tree.cpp + clonal_trees/forest/forest.cpp + clonal_trees/tree_creator/tree_creator.cpp + clonal_trees/tree_creator/tree_size_generator.cpp + clonal_trees/tree_creator/shm_creator.cpp + clonal_trees/tree_creator/forest_creator.cpp + clonal_trees/tree_creator/forest_storage_creator.cpp + clonal_trees/tree_creator/exporters.cpp + clonal_trees/fast_stop_codon_checker/fast_stop_codon_checker.cpp + base_repertoire/gene_chooser/custom_gene_chooser.cpp) + +target_link_libraries(ig_simulator_library + vj_finder_library + cdr_labeler_library + input + ${COMMON_LIBRARIES} + ) + + +add_executable(ig_simulator main.cpp) + +target_link_libraries(ig_simulator ig_simulator_library) diff --git a/src/ig_simulator/base_repertoire/base_repertoire.cpp b/src/ig_simulator/base_repertoire/base_repertoire.cpp new file mode 100644 index 00000000..79886de4 --- /dev/null +++ b/src/ig_simulator/base_repertoire/base_repertoire.cpp @@ -0,0 +1,24 @@ +// +// Created by Andrew Bzikadze on 4/2/17. +// + +#include "base_repertoire.hpp" + +namespace ig_simulator { + +void print_base_repertoire(const BaseRepertoire& base_repertoire, std::ostream& fasta, std::ostream& info) { + size_t id = 0; + for (const auto& cluster : base_repertoire) { + fasta << ">forest_" << id << "_multiplicity_" << cluster.Multiplicity() << '\n'; + fasta << cluster.MetarootPtr()->Sequence() << '\n'; + + info << "Index (zero-based): " << id << " / " << base_repertoire.size() - 1 + << " (" << base_repertoire.size() << ") " + << '\n' << *(cluster.MetarootPtr()) + <<"***************************************************************************\n\n"; + + id++; + } +} + +} // End namespapce ig_simulator diff --git a/src/ig_simulator/base_repertoire/base_repertoire.hpp b/src/ig_simulator/base_repertoire/base_repertoire.hpp new file mode 100644 index 00000000..96061b98 --- /dev/null +++ b/src/ig_simulator/base_repertoire/base_repertoire.hpp @@ -0,0 +1,15 @@ +// +// Created by Andrew Bzikadze on 4/2/17. +// + +#pragma once + +#include "metaroot_cluster/metaroot_cluster.hpp" + +namespace ig_simulator { + +using BaseRepertoire = std::vector; + +void print_base_repertoire(const BaseRepertoire& base_repertoire, std::ostream& fasta, std::ostream& info); + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/base_repertoire_simulator.cpp b/src/ig_simulator/base_repertoire/base_repertoire_simulator.cpp new file mode 100644 index 00000000..62c86013 --- /dev/null +++ b/src/ig_simulator/base_repertoire/base_repertoire_simulator.cpp @@ -0,0 +1,31 @@ +// +// Created by Andrew Bzikadze on 3/29/17. +// + +#include "base_repertoire_simulator.hpp" + +namespace ig_simulator { + +BaseRepertoire BaseRepertoireSimulator::Simulate(size_t size) { + BaseRepertoire repertoire; + repertoire.reserve(size); + + size_t productive_size = static_cast (static_cast(size) * productive_part); + size_t i = 0; + while(i < productive_size) { + MetarootCluster cluster{metaroot_creator_p->Createroot(), + multiplicity_creator_p->RandomMultiplicity()}; + if (cluster.MetarootPtr()->IsProductive()) { + repertoire.emplace_back(std::move(cluster)); + i++; + } + } + + for(; i < size; ++i) { + repertoire.emplace_back(metaroot_creator_p->Createroot(), + multiplicity_creator_p->RandomMultiplicity()); + } + return repertoire; +} + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/base_repertoire_simulator.hpp b/src/ig_simulator/base_repertoire/base_repertoire_simulator.hpp new file mode 100644 index 00000000..5660c70a --- /dev/null +++ b/src/ig_simulator/base_repertoire/base_repertoire_simulator.hpp @@ -0,0 +1,45 @@ +// +// Created by Andrew Bzikadze on 3/29/17. +// + +#pragma once + +#include +#include +#include + +#include "ig_simulator_config.hpp" +#include "germline_utils/germline_db_generator.hpp" +#include "metaroot_cluster/metaroot_cluster.hpp" +#include "metaroot_creator/config_based_getter.hpp" +#include "multiplicity_creator/multiplicity_creator.hpp" +#include "base_repertoire/base_repertoire.hpp" +#include "productivity_checker/productivity_checker.hpp" + +namespace ig_simulator { + +class BaseRepertoireSimulator { +private: + AbstractMetarootCreatorCPtr metaroot_creator_p; + AbstractMultiplicityCreatorPtr multiplicity_creator_p; + double productive_part; + +public: + BaseRepertoireSimulator(const IgSimulatorConfig::SimulationParams::BaseRepertoireParams& config, + const germline_utils::ChainType& chain_type, + std::vector &db): + metaroot_creator_p(get_metarootcreator(chain_type, config.metaroot_simulation_params, db)), + multiplicity_creator_p(get_multiplicity_creator(config.multiplicity_creator_params)), + productive_part(config.productive_params.productive_part) + { } + + BaseRepertoireSimulator() = delete; + BaseRepertoireSimulator(const BaseRepertoireSimulator&) = delete; + BaseRepertoireSimulator(BaseRepertoireSimulator&&) = default; + BaseRepertoireSimulator& operator=(const BaseRepertoireSimulator&) = delete; + BaseRepertoireSimulator& operator=(BaseRepertoireSimulator&&) = delete; + + BaseRepertoire Simulate(size_t size); +}; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/gene_chooser/abstract_gene_chooser.cpp b/src/ig_simulator/base_repertoire/gene_chooser/abstract_gene_chooser.cpp new file mode 100644 index 00000000..dc805406 --- /dev/null +++ b/src/ig_simulator/base_repertoire/gene_chooser/abstract_gene_chooser.cpp @@ -0,0 +1,28 @@ +// +// Created by Andrew Bzikadze on 3/16/17. +// + +#include "abstract_gene_chooser.hpp" + +namespace ig_simulator { + +AbstractVDJGeneChooser::AbstractVDJGeneChooser(const std::vector& db): + v_db_p_(&db.front()), + d_db_p_(nullptr), + j_db_p_(&db.back()), + is_vdj(false) +{ + VERIFY(db.size() >= 2 and db.size() <= 3); + + if (db.size() == 3) { + d_db_p_ = &db[1]; + is_vdj = true; + } +} + +inline bool AbstractVDJGeneChooser::IsVDJ() const { + // if (not is_vdj) { VERIFY(d_db_p_ != nullptr); } + return is_vdj; +} + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/gene_chooser/abstract_gene_chooser.hpp b/src/ig_simulator/base_repertoire/gene_chooser/abstract_gene_chooser.hpp new file mode 100644 index 00000000..f2578ee0 --- /dev/null +++ b/src/ig_simulator/base_repertoire/gene_chooser/abstract_gene_chooser.hpp @@ -0,0 +1,50 @@ +// +// Created by Andrew Bzikadze on 3/16/17. +// + +#pragma once + +#include +#include + +#include "germline_utils/germline_databases/custom_gene_database.hpp" +#include "ig_simulator_utils.hpp" + +namespace ig_simulator { + +using VDJ_GenesIndexTuple = std::tuple; + +class AbstractVDJGeneChooser { +protected: + const germline_utils::CustomGeneDatabase *v_db_p_; + const germline_utils::CustomGeneDatabase *d_db_p_; + const germline_utils::CustomGeneDatabase *j_db_p_; + + // This variable defines whether D segment is generated + // d_dp_p_ MUST be nullptr if is_vdj == false + bool is_vdj; + +public: + explicit AbstractVDJGeneChooser(const std::vector& db); + + virtual VDJ_GenesIndexTuple ChooseGenes() const = 0; + + /** + * This method suggests whether D segment is generated. + * If `false` then second component of `VDJ_GenesIndexTuple` + * returned by `ChooseGenes()` will be size_t(-1). + */ + bool IsVDJ() const; + + AbstractVDJGeneChooser() = delete; + AbstractVDJGeneChooser(const AbstractVDJGeneChooser&) = delete; + AbstractVDJGeneChooser(AbstractVDJGeneChooser&&) = delete; + AbstractVDJGeneChooser& operator=(const AbstractVDJGeneChooser&) = delete; + AbstractVDJGeneChooser& operator=(AbstractVDJGeneChooser&&) = delete; + + virtual ~AbstractVDJGeneChooser() { } +}; + +using AbstractVDJGeneChooserCPtr = std::unique_ptr; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/gene_chooser/config_based_getter.cpp b/src/ig_simulator/base_repertoire/gene_chooser/config_based_getter.cpp new file mode 100644 index 00000000..d3f5b7cb --- /dev/null +++ b/src/ig_simulator/base_repertoire/gene_chooser/config_based_getter.cpp @@ -0,0 +1,24 @@ +// +// Created by Andrew Bzikadze on 3/31/17. +// + +#include "config_based_getter.hpp" +#include "uniform_gene_chooser.hpp" +#include "custom_gene_chooser.hpp" + + +namespace ig_simulator { + +AbstractVDJGeneChooserCPtr get_gene_chooser(const GeneChooserParams& config, + const std::vector& db) +{ + if (config.method == GeneChooserMethod::Uniform) + return AbstractVDJGeneChooserCPtr(new UniformVDJGeneChooser(db)); + // TODO add Custom + // else if (config.method == GeneChooserMethod::Custom) + // return AbstractVDJGeneChooserCPtr(new CustomGeneChooser(db, config.custom_gene_chooser_params)); + VERIFY(false); +} + +} // End namespace ig_simulator + diff --git a/src/ig_simulator/base_repertoire/gene_chooser/config_based_getter.hpp b/src/ig_simulator/base_repertoire/gene_chooser/config_based_getter.hpp new file mode 100644 index 00000000..c4efa0c7 --- /dev/null +++ b/src/ig_simulator/base_repertoire/gene_chooser/config_based_getter.hpp @@ -0,0 +1,16 @@ +// +// Created by Andrew Bzikadze on 3/31/17. +// + +#pragma once + +#include "abstract_gene_chooser.hpp" +#include "ig_simulator_config.hpp" + +namespace ig_simulator { + +AbstractVDJGeneChooserCPtr +get_gene_chooser(const GeneChooserParams& config, + const std::vector& db); + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/gene_chooser/custom_gene_chooser.cpp b/src/ig_simulator/base_repertoire/gene_chooser/custom_gene_chooser.cpp new file mode 100644 index 00000000..c2d77152 --- /dev/null +++ b/src/ig_simulator/base_repertoire/gene_chooser/custom_gene_chooser.cpp @@ -0,0 +1,89 @@ +// +// Created by Andrew Bzikadze on 4/26/17. +// + +#include "custom_gene_chooser.hpp" + +namespace ig_simulator { + +std::vector +CustomGeneChooser::ReadProbabilities(const std::string& filename, + const germline_utils::CustomGeneDatabase& db) +{ + using boost::tokenizer; + using boost::escaped_list_separator; + using Tokenizer = tokenizer>; + + VERIFY(db.cbegin() + 1 == db.cend()); + germline_utils::ImmuneGeneType igtype { *db.cbegin() }; + const germline_utils::ImmuneGeneDatabase& igdb = db.GetConstDbByGeneType(igtype); + + std::ifstream in; + in.open(filename); + VERIFY(in.is_open()); + + std::vector probs(db.size()); + std::string line; + + std::vector parsed_vector; + while (getline(in, line)) { + if (line.empty()) + break; + Tokenizer tokenizer(line); + parsed_vector.assign(tokenizer.begin(), tokenizer.end()); + assert(parsed_vector.size() == 2); + size_t index_of_current_gene = igdb.GetIndexByName(parsed_vector.front()); + probs[index_of_current_gene] = std::stod(parsed_vector.back()); + VERIFY(probs[index_of_current_gene] >= 0 and + probs[index_of_current_gene] <= 1); + } + in.close(); + return probs; +} + +std::discrete_distribution +CustomGeneChooser::GetDistr(const std::string& filename, + const germline_utils::CustomGeneDatabase& db) +{ + std::vector probs { ReadProbabilities(filename, db) }; + return { probs.begin(), probs.end() }; +} + +CustomGeneChooser::CustomGeneChooser(const std::vector& db, + const std::string& v_genes_probs, + const std::string& d_genes_probs, + const std::string& j_genes_probs): + AbstractVDJGeneChooser(db), + v_distr(GetDistr(v_genes_probs, db.front())), + d_distr(), + j_distr(GetDistr(j_genes_probs, db.back())) +{ + if (db.size() == 3) { + d_distr = GetDistr(d_genes_probs, db[1]); + } +} + +CustomGeneChooser::CustomGeneChooser(const std::vector& db, + const GeneChooserParams::CustomGeneChooserParams& config): + CustomGeneChooser(db, config.v_genes_probs, config.d_genes_probs, config.j_genes_probs) +{ } + +VDJ_GenesIndexTuple CustomGeneChooser::ChooseGenes() const +{ + VDJ_GenesIndexTuple result(size_t(-1), size_t(-1), size_t(-1)); + + VERIFY(v_db_p_ != nullptr); + std::get<0>(result) = v_distr(MTSingleton::GetInstance()); + + if (is_vdj) { + VERIFY(d_db_p_ != nullptr); + std::get<1>(result) = d_distr(MTSingleton::GetInstance()); + } + + VERIFY(j_db_p_ != nullptr); + std::get<2>(result) = j_distr(MTSingleton::GetInstance()); + + return result; +} + +} // End namespace ig_simulator \ No newline at end of file diff --git a/src/ig_simulator/base_repertoire/gene_chooser/custom_gene_chooser.hpp b/src/ig_simulator/base_repertoire/gene_chooser/custom_gene_chooser.hpp new file mode 100644 index 00000000..912e0958 --- /dev/null +++ b/src/ig_simulator/base_repertoire/gene_chooser/custom_gene_chooser.hpp @@ -0,0 +1,40 @@ +// +// Created by Andrew Bzikadze on 4/26/17. +// + +#pragma once + +#include +#include "random_generator.hpp" +#include "abstract_gene_chooser.hpp" +#include +#include + +namespace ig_simulator { + +class CustomGeneChooser final : public AbstractVDJGeneChooser { +private: + mutable std::discrete_distribution v_distr; + mutable std::discrete_distribution d_distr; + mutable std::discrete_distribution j_distr; + +private: + static std::vector ReadProbabilities(const std::string& filename, + const germline_utils::CustomGeneDatabase& db); + + static std::discrete_distribution GetDistr(const std::string& filename, + const germline_utils::CustomGeneDatabase& db); + +public: + CustomGeneChooser(const std::vector& db, + const std::string& v_genes_probs, + const std::string& d_genes_probs, + const std::string& j_genes_probs); + + CustomGeneChooser(const std::vector& db, + const GeneChooserParams::CustomGeneChooserParams& config); + + VDJ_GenesIndexTuple ChooseGenes() const override; +}; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/gene_chooser/uniform_gene_chooser.cpp b/src/ig_simulator/base_repertoire/gene_chooser/uniform_gene_chooser.cpp new file mode 100644 index 00000000..2d4cee84 --- /dev/null +++ b/src/ig_simulator/base_repertoire/gene_chooser/uniform_gene_chooser.cpp @@ -0,0 +1,27 @@ +// +// Created by Andrew Bzikadze on 3/17/17. +// + +#include "simulation_routines.hpp" +#include "uniform_gene_chooser.hpp" + +namespace ig_simulator { + +VDJ_GenesIndexTuple UniformVDJGeneChooser::ChooseGenes() const { + VDJ_GenesIndexTuple result(size_t(-1), size_t(-1), size_t(-1)); + + VERIFY(v_db_p_ != nullptr); + std::get<0>(result) = random_index(0, v_db_p_->size() - 1); + + if (is_vdj) { + VERIFY(d_db_p_ != nullptr); + std::get<1>(result) = random_index(0, d_db_p_->size() - 1); + } + + VERIFY(j_db_p_ != nullptr); + std::get<2>(result) = random_index(0, j_db_p_->size() - 1); + + return result; +} + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/gene_chooser/uniform_gene_chooser.hpp b/src/ig_simulator/base_repertoire/gene_chooser/uniform_gene_chooser.hpp new file mode 100644 index 00000000..188df559 --- /dev/null +++ b/src/ig_simulator/base_repertoire/gene_chooser/uniform_gene_chooser.hpp @@ -0,0 +1,22 @@ +// +// Created by Andrew Bzikadze on 3/17/17. +// + +#pragma once + +#include + +#include "abstract_gene_chooser.hpp" + +namespace ig_simulator { + +class UniformVDJGeneChooser : public AbstractVDJGeneChooser { +public: + explicit UniformVDJGeneChooser(const std::vector& db): + AbstractVDJGeneChooser(db) + { } + + VDJ_GenesIndexTuple ChooseGenes() const override; +}; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/metaroot/metaroot.cpp b/src/ig_simulator/base_repertoire/metaroot/metaroot.cpp new file mode 100644 index 00000000..6d411a0f --- /dev/null +++ b/src/ig_simulator/base_repertoire/metaroot/metaroot.cpp @@ -0,0 +1,188 @@ +// +// Created by Andrew Bzikadze on 3/22/17. +// + +#include "metaroot.hpp" +#include "convert.hpp" +#include +#include "seqan/sequence.h" + +namespace ig_simulator { + +inline +AbstractMetaroot::AbstractMetaroot(const germline_utils::CustomGeneDatabase *v_db_p, + const germline_utils::CustomGeneDatabase *j_db_p, + const size_t v_ind, + const size_t j_ind, + const annotation_utils::CDRLabeling& cdr_labeling, + int cleavage_v, + int cleavage_j) : + v_db_p(check_pointer(v_db_p)), + j_db_p(check_pointer(j_db_p)), + v_ind(v_ind), + j_ind(j_ind), + cleavage_v(cleavage_v), + cleavage_j(cleavage_j), + cdr_labeling(cdr_labeling) +{ + VERIFY(v_ind < v_db_p->size()); + VERIFY(j_ind < j_db_p->size()); +} + +void AbstractMetaroot::PrepareGene(seqan::Dna5String& gene, int left_cleavage, int right_cleavage) { + VERIFY(static_cast(abs(left_cleavage)) <= seqan::length(gene)); + if (left_cleavage > 0) { + gene = seqan::suffix(gene, left_cleavage); + } else if (left_cleavage < 0) { + seqan::Dna5String pal = seqan::prefix(gene, -left_cleavage); + seqan::reverseComplement(pal); + pal += gene; + gene = pal; + } + VERIFY(static_cast(abs(right_cleavage)) <= seqan::length(gene)); + if (right_cleavage > 0) { + gene = seqan::prefix(gene, seqan::length(gene) - right_cleavage); + } else if (right_cleavage < 0) { + seqan::Dna5String pal = seqan::suffix(gene, seqan::length(gene) + right_cleavage); + seqan::reverseComplement(pal); + gene += pal; + } +} + +VJMetaroot::VJMetaroot(const germline_utils::CustomGeneDatabase *v_db_p, + const germline_utils::CustomGeneDatabase *j_db_p, + const size_t v_ind, + const size_t j_ind, + const annotation_utils::CDRLabeling &cdr_labeling, + int cleavage_v, + int cleavage_j, + seqan::Dna5String insertion_vj) : + AbstractMetaroot(v_db_p, j_db_p, v_ind, j_ind, cdr_labeling, cleavage_v, cleavage_j), + insertion_vj(insertion_vj) +{ + CalculateSequence(); +} + +const std::string& VJMetaroot::Sequence() const { return sequence; } + +void VJMetaroot::CalculateSequence() { + VERIFY(v_db_p != nullptr); + VERIFY(j_db_p != nullptr); + + seqan::Dna5String v_gene = (*v_db_p)[v_ind].seq(); + seqan::Dna5String j_gene = (*j_db_p)[j_ind].seq(); + + PrepareGene(v_gene, 0, cleavage_v); + PrepareGene(j_gene, cleavage_j, 0); + + sequence = core::seqan_string_to_string(v_gene); + sequence += core::seqan_string_to_string(insertion_vj); + sequence += core::seqan_string_to_string(j_gene); +} + +VDJMetaroot::VDJMetaroot(const germline_utils::CustomGeneDatabase *v_db_p, + const germline_utils::CustomGeneDatabase *d_db_p, + const germline_utils::CustomGeneDatabase *j_db_p, + const size_t v_ind, + const size_t d_ind, + const size_t j_ind, + const annotation_utils::CDRLabeling& cdr_labeling, + int cleavage_v, + int cleavage_d_left, + int cleavage_d_right, + int cleavage_j, + const seqan::Dna5String& insertion_vd, + const seqan::Dna5String& insertion_dj) : + AbstractMetaroot(v_db_p, j_db_p, v_ind, j_ind, cdr_labeling, cleavage_v, cleavage_j), + d_db_p(check_pointer(d_db_p)), + d_ind(d_ind), + cleavage_d_left(cleavage_d_left), + cleavage_d_right(cleavage_d_right), + insertion_vd(insertion_vd), + insertion_dj(insertion_dj) +{ + VERIFY(d_ind < d_db_p->size()); + CalculateSequence(); +} + +const std::string& VDJMetaroot::Sequence() const { return sequence; } + +void VDJMetaroot::CalculateSequence() { + VERIFY(v_db_p != nullptr); + VERIFY(d_db_p != nullptr); + VERIFY(j_db_p != nullptr); + + seqan::Dna5String v_gene = (*v_db_p)[v_ind].seq(); + seqan::Dna5String d_gene = (*d_db_p)[d_ind].seq(); + seqan::Dna5String j_gene = (*j_db_p)[j_ind].seq(); + + PrepareGene(v_gene, 0, cleavage_v); + PrepareGene(d_gene, cleavage_d_left, cleavage_d_right); + PrepareGene(j_gene, cleavage_j, 0); + + sequence = core::seqan_string_to_string(v_gene); + sequence += core::seqan_string_to_string(insertion_vd); + sequence += core::seqan_string_to_string(d_gene); + sequence += core::seqan_string_to_string(insertion_dj); + sequence += core::seqan_string_to_string(j_gene); +} + +void VJMetaroot::print(std::ostream& out) const { + out << "VJMetaroot:\n\n" << + + "V gene name: " << (*V_DB_P())[V_Ind()].name() << "\n" << + "J gene name: " << (*J_DB_P())[J_Ind()].name() << "\n\n" << + + "V gene: " << (*V_DB_P())[V_Ind()].seq() << "\n" << + "J gene: " << (*J_DB_P())[J_Ind()].seq() << "\n\n" << + + "Cleavage in V gene: " << CleavageV() << "\n" << + "Cleavage in J gene: " << CleavageJ() << "\n\n" << + + "Insertion in VJ junction: " << InsertionVJ() << "\n\n" << + + "CDR1 positions: " << CDRLabeling().cdr1.start_pos << " " << CDRLabeling().cdr1.end_pos << "\n" << + "CDR2 positions: " << CDRLabeling().cdr2.start_pos << " " << CDRLabeling().cdr2.end_pos << "\n" << + "CDR3 positions: " << CDRLabeling().cdr3.start_pos << " " << CDRLabeling().cdr3.end_pos << "\n\n" << + + "CDR1: " << sequence.substr(CDRLabeling().cdr1.start_pos, + CDRLabeling().cdr1.end_pos - CDRLabeling().cdr1.start_pos + 1) << "\n" << + "CDR2: " << sequence.substr(CDRLabeling().cdr2.start_pos, + CDRLabeling().cdr2.end_pos - CDRLabeling().cdr2.start_pos + 1) << "\n" << + "CDR3: " << sequence.substr(CDRLabeling().cdr3.start_pos, + CDRLabeling().cdr3.end_pos - CDRLabeling().cdr3.start_pos + 1) << "\n"; +} + +void VDJMetaroot::print(std::ostream& out) const { + out << "VDJMetaroot:\n\n" << + + "V gene name: " << (*V_DB_P())[V_Ind()].name() << "\n" << + "D gene name: " << (*D_DB_P())[D_Ind()].name() << "\n" << + "J gene name: " << (*J_DB_P())[J_Ind()].name() << "\n\n" << + + "V gene: " << (*V_DB_P())[V_Ind()].seq() << "\n" << + "D gene: " << (*D_DB_P())[D_Ind()].seq() << "\n" << + "J gene: " << (*J_DB_P())[J_Ind()].seq() << "\n\n" << + + "Cleavage in V gene: " << CleavageV() << "\n" << + "Cleavage in D gene (left): " << CleavageDLeft() << "\n" << + "Cleavage in D gene (right): " << CleavageDRight() << "\n" << + "Cleavage in J gene: " << CleavageJ() << "\n\n" << + + "Insertion in VD junction: " << InsertionVD() << "\n" << + "Insertion in DJ junction: " << InsertionDJ() << "\n\n" << + + "CDR1 positions: " << CDRLabeling().cdr1.start_pos << " " << CDRLabeling().cdr1.end_pos << "\n" << + "CDR2 positions: " << CDRLabeling().cdr2.start_pos << " " << CDRLabeling().cdr2.end_pos << "\n" << + "CDR3 positions: " << CDRLabeling().cdr3.start_pos << " " << CDRLabeling().cdr3.end_pos << "\n\n" << + + + "CDR1: " << sequence.substr(CDRLabeling().cdr1.start_pos, + CDRLabeling().cdr1.end_pos - CDRLabeling().cdr1.start_pos + 1) << "\n" << + "CDR2: " << sequence.substr(CDRLabeling().cdr2.start_pos, + CDRLabeling().cdr2.end_pos - CDRLabeling().cdr2.start_pos + 1) << "\n" << + "CDR3: " << sequence.substr(CDRLabeling().cdr3.start_pos, + CDRLabeling().cdr3.end_pos - CDRLabeling().cdr3.start_pos + 1) << "\n"; +} + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/metaroot/metaroot.hpp b/src/ig_simulator/base_repertoire/metaroot/metaroot.hpp new file mode 100644 index 00000000..a8612c73 --- /dev/null +++ b/src/ig_simulator/base_repertoire/metaroot/metaroot.hpp @@ -0,0 +1,148 @@ +// +// Created by Andrew Bzikadze on 3/22/17. +// + +#pragma once + +#include "gtest/gtest_prod.h" +#include "germline_utils/germline_db_generator.hpp" +#include "annotation_utils/cdr_labeling_primitives.hpp" +#include "ig_simulator_utils.hpp" + +namespace ig_simulator { + +class AbstractMetaroot { + friend class IgSimulatorTest; + FRIEND_TEST(IgSimulatorTest, PrepareGeneTest); + +protected: + const germline_utils::CustomGeneDatabase * v_db_p; + const germline_utils::CustomGeneDatabase * j_db_p; + + const size_t v_ind; + const size_t j_ind; + + // Negative cleavage means palindrome + const int cleavage_v; + const int cleavage_j; + + const annotation_utils::CDRLabeling cdr_labeling; + + std::string sequence; + + bool productive = true; + +protected: + static void PrepareGene(seqan::Dna5String& gene, int left_cleavage, int right_cleavage); + virtual void CalculateSequence() = 0; + virtual void print(std::ostream& out) const = 0; + +public: + AbstractMetaroot(const germline_utils::CustomGeneDatabase *v_db_p, + const germline_utils::CustomGeneDatabase *j_db_p, + const size_t v_ind, + const size_t j_ind, + const annotation_utils::CDRLabeling& cdr_labeling, + int cleavage_v, + int cleavage_j); + + const germline_utils::CustomGeneDatabase *V_DB_P() const { return v_db_p; } + const germline_utils::CustomGeneDatabase *J_DB_P() const { return j_db_p; } + + size_t V_Ind() const { return v_ind; } + size_t J_Ind() const { return j_ind; } + int CleavageV() const { return cleavage_v; } + int CleavageJ() const { return cleavage_j; } + + const annotation_utils::CDRLabeling CDRLabeling() const { return cdr_labeling; } + + size_t Length() const { return sequence.size(); } + + virtual const std::string& Sequence() const = 0; + + bool IsProductive() const { return productive; } + void SetNonProductive() { productive = false; } + + friend std::ostream& operator<<(std::ostream& out, const AbstractMetaroot& root) { + root.print(out); + return out; + } + + AbstractMetaroot() = delete; + AbstractMetaroot(const AbstractMetaroot&) = default; + AbstractMetaroot(AbstractMetaroot&&) = default; + AbstractMetaroot& operator=(const AbstractMetaroot&) = delete; + AbstractMetaroot& operator=(AbstractMetaroot&&) = delete; + + virtual ~AbstractMetaroot() { } +}; + +using AbstractMetarootCPtr = std::unique_ptr; + + +class VJMetaroot final: public AbstractMetaroot { +private: + const seqan::Dna5String insertion_vj; + void CalculateSequence() override; + void print(std::ostream&) const override; + +public: + VJMetaroot(const germline_utils::CustomGeneDatabase *v_db_p, + const germline_utils::CustomGeneDatabase *j_db_p, + const size_t v_ind, + const size_t j_ind, + const annotation_utils::CDRLabeling &cdr_labeling, + int cleavage_v, + int cleavage_j, + seqan::Dna5String insertion_vj = ""); + + const seqan::Dna5String& InsertionVJ() const { return insertion_vj; } + + const std::string& Sequence() const override; +}; + + +class VDJMetaroot final: public AbstractMetaroot { +private: + const germline_utils::CustomGeneDatabase * d_db_p; + + const size_t d_ind; + + // Negative cleavage means palindrome + const int cleavage_d_left; + const int cleavage_d_right; + + const seqan::Dna5String insertion_vd; + const seqan::Dna5String insertion_dj; + +private: + void print(std::ostream&) const override; + void CalculateSequence() override; + +public: + VDJMetaroot(const germline_utils::CustomGeneDatabase *v_db_p, + const germline_utils::CustomGeneDatabase *d_db_p, + const germline_utils::CustomGeneDatabase *j_db_p, + const size_t v_ind, + const size_t d_ind, + const size_t j_ind, + const annotation_utils::CDRLabeling& cdr_labeling, + int cleavage_v, + int cleavage_d_left, + int cleavage_d_right, + int cleavage_j, + const seqan::Dna5String& insertion_vd = "", + const seqan::Dna5String& insertion_dj = ""); + + const germline_utils::CustomGeneDatabase *D_DB_P() const { return d_db_p; } + + size_t D_Ind() const { return d_ind; } + int CleavageDLeft() const { return cleavage_d_left; } + int CleavageDRight() const { return cleavage_d_right; } + const seqan::Dna5String& InsertionVD() const { return insertion_vd; } + const seqan::Dna5String& InsertionDJ() const { return insertion_dj; } + + const std::string& Sequence() const override; +}; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/metaroot_cluster/metaroot_cluster.cpp b/src/ig_simulator/base_repertoire/metaroot_cluster/metaroot_cluster.cpp new file mode 100644 index 00000000..bd839703 --- /dev/null +++ b/src/ig_simulator/base_repertoire/metaroot_cluster/metaroot_cluster.cpp @@ -0,0 +1,5 @@ +// +// Created by Andrew Bzikadze on 4/2/17. +// + +#include "metaroot_cluster.hpp" diff --git a/src/ig_simulator/base_repertoire/metaroot_cluster/metaroot_cluster.hpp b/src/ig_simulator/base_repertoire/metaroot_cluster/metaroot_cluster.hpp new file mode 100644 index 00000000..11428809 --- /dev/null +++ b/src/ig_simulator/base_repertoire/metaroot_cluster/metaroot_cluster.hpp @@ -0,0 +1,32 @@ +// +// Created by Andrew Bzikadze on 4/2/17. +// + +#pragma once + +#include "base_repertoire/metaroot/metaroot.hpp" + +namespace ig_simulator { + +class MetarootCluster { +private: + AbstractMetarootCPtr metaroot_p; + size_t multiplicity; + +public: + MetarootCluster(AbstractMetarootCPtr&& metaroot_p, + const size_t multiplicity): + metaroot_p(std::move(metaroot_p)), + multiplicity(multiplicity) + { } + + MetarootCluster(const MetarootCluster&) = delete; + MetarootCluster(MetarootCluster&&) = default; + MetarootCluster& operator=(const MetarootCluster&) = delete; + MetarootCluster& operator=(MetarootCluster&&) = delete; + + const AbstractMetarootCPtr& MetarootPtr() const { return metaroot_p; } + size_t Multiplicity() const { return multiplicity; } +}; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/metaroot_creator/config_based_getter.cpp b/src/ig_simulator/base_repertoire/metaroot_creator/config_based_getter.cpp new file mode 100644 index 00000000..87b1584f --- /dev/null +++ b/src/ig_simulator/base_repertoire/metaroot_creator/config_based_getter.cpp @@ -0,0 +1,20 @@ +// +// Created by Andrew Bzikadze on 3/31/17. +// + +#include "config_based_getter.hpp" + + +namespace ig_simulator { + +AbstractMetarootCreatorCPtr get_metarootcreator(const germline_utils::ChainType chain_type, + const MetarootSimulationParams& config, + std::vector& db) +{ + if (chain_type.IsVDJ()) + return AbstractMetarootCreatorCPtr(new VDJMetarootCreator(config, db)); + return AbstractMetarootCreatorCPtr(new VJMetarootCreator(config, db)); +} + +} // End namespace ig_simulator + diff --git a/src/ig_simulator/base_repertoire/metaroot_creator/config_based_getter.hpp b/src/ig_simulator/base_repertoire/metaroot_creator/config_based_getter.hpp new file mode 100644 index 00000000..8547ec05 --- /dev/null +++ b/src/ig_simulator/base_repertoire/metaroot_creator/config_based_getter.hpp @@ -0,0 +1,15 @@ +// +// Created by Andrew Bzikadze on 3/31/17. +// + +#pragma once + +#include "metaroot_creator.hpp" + +namespace ig_simulator { + +AbstractMetarootCreatorCPtr get_metarootcreator(const germline_utils::ChainType chain_type, + const MetarootSimulationParams& config, + std::vector& db); + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/metaroot_creator/metaroot_creator.cpp b/src/ig_simulator/base_repertoire/metaroot_creator/metaroot_creator.cpp new file mode 100644 index 00000000..cfc74229 --- /dev/null +++ b/src/ig_simulator/base_repertoire/metaroot_creator/metaroot_creator.cpp @@ -0,0 +1,160 @@ +// +// Created by Andrew Bzikadze on 3/20/17. +// + +#include "metaroot_creator.hpp" + +#include + +#include +#include "random_generator.hpp" +#include "annotation_utils/cdr_labeling_primitives.hpp" + +#include + +namespace ig_simulator { + +AbstractMetarootCreator::AbstractMetarootCreator(const MetarootSimulationParams& config, + std::vector& db, + AbstractVDJGeneChooserCPtr&& gene_chooser): + v_db_p(&db.front()), + j_db_p(&db.back()), + prob_cleavage_v(check_probability(config.cleavage_params.prob_cleavage_v)), + prob_cleavage_j(check_probability(config.cleavage_params.prob_cleavage_j)), + gene_chooser_p(std::move(gene_chooser)), + nucl_remover_p(get_nucleotides_remover(config.nucleotides_remover_params)), + nucl_creator_p(get_nucleotides_creator(config.p_nucleotides_creator_params)), + nucl_inserter_p(get_nucleotides_inserter(config.n_nucleotides_inserter_params)), + v_cdr_db(cdr_labeler::GermlineDbLabeler(db.front(), config.cdr_labeler_config.cdrs_params).ComputeLabeling()), + j_cdr_db(cdr_labeler::GermlineDbLabeler(db.back(), config.cdr_labeler_config.cdrs_params).ComputeLabeling()), + productivity_checker() +{ + VERIFY(db.size() >= 2); + VERIFY(v_db_p->size() > 0); + VERIFY(j_db_p->size() > 0); +} + +VJMetarootCreator::VJMetarootCreator(const MetarootSimulationParams& config, + std::vector& db): + AbstractMetarootCreator(config, db, get_gene_chooser(config.gene_chooser_params, db)) +{ + VERIFY(db.size() == 2); +} + +AbstractMetarootCPtr VJMetarootCreator::Createroot() const { + auto genes_ind = gene_chooser_p->ChooseGenes(); + VERIFY(std::get<1>(genes_ind) == size_t(-1)); + + bool is_cleavage_v = std::bernoulli_distribution(prob_cleavage_v)(MTSingleton::GetInstance()); + bool is_cleavage_j = std::bernoulli_distribution(prob_cleavage_j)(MTSingleton::GetInstance()); + + int cleavage_v = is_cleavage_v ? + static_cast(nucl_remover_p->RemoveInVGene()) : + -static_cast(nucl_creator_p->CreateInVGene()); + + int cleavage_j = is_cleavage_j ? + static_cast(nucl_remover_p->RemoveInJGene()) : + -static_cast(nucl_creator_p->CreateInJGene()); + + seqan::Dna5String vj_insertion(nucl_inserter_p->GetVJInsertion()); + + const auto& v_gene = (*v_db_p)[std::get<0>(genes_ind)]; + const auto& j_gene = (*j_db_p)[std::get<2>(genes_ind)]; + + annotation_utils::CDRLabeling cdr_labeling(v_cdr_db.GetLabelingByGene(v_gene)); + annotation_utils::CDRLabeling j_gene_cdr_labeling(j_cdr_db.GetLabelingByGene(j_gene)); + + if (not v_cdr_db.CDRLabelingIsEmpty(v_gene) and not j_cdr_db.CDRLabelingIsEmpty(j_gene)) { + long long cdr3_end = static_cast(v_gene.length()) + + -cleavage_v + + static_cast(seqan::length(vj_insertion)) + + -cleavage_j + + static_cast(j_gene_cdr_labeling.cdr3.end_pos); + VERIFY(cdr3_end >= 0); + cdr_labeling.cdr3.end_pos = static_cast(cdr3_end); + } + + VJMetaroot metaroot { v_db_p, j_db_p, + std::get<0>(genes_ind), std::get<2>(genes_ind), + cdr_labeling, + cleavage_v, cleavage_j, + vj_insertion }; + if (not productivity_checker.IsProductive(metaroot)) { + metaroot.SetNonProductive(); + } + return AbstractMetarootCPtr(new VJMetaroot(std::move(metaroot))); +} + +VDJMetarootCreator::VDJMetarootCreator(const MetarootSimulationParams& config, + std::vector& db): + AbstractMetarootCreator(config, db, get_gene_chooser(config.gene_chooser_params, db)), + d_db_p(&db.at(1)), + prob_cleavage_d_left(check_probability(config.cleavage_params.prob_cleavage_d_left)), + prob_cleavage_d_right(check_probability(config.cleavage_params.prob_cleavage_d_right)) +{ + VERIFY(db.size() == 3); + VERIFY(d_db_p->size() > 0); +} + +AbstractMetarootCPtr VDJMetarootCreator::Createroot() const { + auto genes_ind = gene_chooser_p->ChooseGenes(); + + bool is_cleavage_v = std::bernoulli_distribution(prob_cleavage_v)(MTSingleton::GetInstance()); + bool is_cleavage_d_left = std::bernoulli_distribution(prob_cleavage_d_left)(MTSingleton::GetInstance()); + bool is_cleavage_d_right = std::bernoulli_distribution(prob_cleavage_d_right)(MTSingleton::GetInstance()); + bool is_cleavage_j = std::bernoulli_distribution(prob_cleavage_j)(MTSingleton::GetInstance()); + + int cleavage_v = is_cleavage_v ? + static_cast(nucl_remover_p->RemoveInVGene()) : + -static_cast(nucl_creator_p->CreateInVGene()); + + int cleavage_d_left = is_cleavage_d_left ? + static_cast(nucl_remover_p->RemoveInDGeneLeft()) : + -static_cast(nucl_creator_p->CreateInDGeneLeft()); + + int cleavage_d_right = is_cleavage_d_right ? + static_cast(nucl_remover_p->RemoveInDGeneRight()) : + -static_cast(nucl_creator_p->CreateInDGeneRight()); + + int cleavage_j = is_cleavage_j ? + static_cast(nucl_remover_p->RemoveInJGene()) : + -static_cast(nucl_creator_p->CreateInJGene()); + + seqan::Dna5String vd_insertion(nucl_inserter_p->GetVDInsertion()); + seqan::Dna5String dj_insertion(nucl_inserter_p->GetDJInsertion()); + + const auto& v_gene = (*v_db_p)[std::get<0>(genes_ind)]; + const auto& d_gene = (*d_db_p)[std::get<1>(genes_ind)]; + const auto& j_gene = (*j_db_p)[std::get<2>(genes_ind)]; + + annotation_utils::CDRLabeling cdr_labeling(v_cdr_db.GetLabelingByGene(v_gene)); + annotation_utils::CDRLabeling j_gene_cdr_labeling(j_cdr_db.GetLabelingByGene(j_gene)); + + if (not v_cdr_db.CDRLabelingIsEmpty(v_gene) and not j_cdr_db.CDRLabelingIsEmpty(j_gene)) { + long long cdr3_end = static_cast(v_gene.length()) + + -cleavage_v + + static_cast(seqan::length(vd_insertion)) + + -cleavage_d_left + + static_cast(d_gene.length()) + + -cleavage_d_right + + static_cast(seqan::length(dj_insertion)) + + -cleavage_j + + static_cast(j_gene_cdr_labeling.cdr3.end_pos); + + VERIFY(cdr3_end >= 0); + cdr_labeling.cdr3.end_pos = static_cast(cdr3_end); + } + + VDJMetaroot metaroot { v_db_p, d_db_p, j_db_p, + std::get<0>(genes_ind), std::get<1>(genes_ind), std::get<2>(genes_ind), + cdr_labeling, + cleavage_v, cleavage_d_left, cleavage_d_right, cleavage_j, + vd_insertion, dj_insertion }; + + if (not productivity_checker.IsProductive(metaroot)) { + metaroot.SetNonProductive(); + } + return AbstractMetarootCPtr(new VDJMetaroot(std::move(metaroot))); +} + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/metaroot_creator/metaroot_creator.hpp b/src/ig_simulator/base_repertoire/metaroot_creator/metaroot_creator.hpp new file mode 100644 index 00000000..4867b3bb --- /dev/null +++ b/src/ig_simulator/base_repertoire/metaroot_creator/metaroot_creator.hpp @@ -0,0 +1,87 @@ +// +// Created by Andrew Bzikadze on 3/20/17. +// + +#pragma once + +#include "germline_utils/chain_type.hpp" +#include "base_repertoire/gene_chooser/abstract_gene_chooser.hpp" +#include "base_repertoire/gene_chooser/config_based_getter.hpp" +#include "base_repertoire/nucleotides_remover/abstract_nucleotides_remover.hpp" +#include "base_repertoire/nucleotides_remover/config_based_getter.hpp" +#include "base_repertoire/p_nucleotides_creator/abstract_nucleotides_creator.hpp" +#include "base_repertoire/p_nucleotides_creator/config_based_getter.hpp" +#include "base_repertoire/n_nucleotides_inserter/abstract_n_nucleotides_inserter.hpp" +#include "base_repertoire/n_nucleotides_inserter/config_based_getter.hpp" +#include "base_repertoire/metaroot/metaroot.hpp" +#include "germline_db_labeler.hpp" +#include "germline_db_labeling.hpp" +#include "cdr_config.hpp" +#include "base_repertoire/productivity_checker/productivity_checker.hpp" + +namespace ig_simulator { + +class AbstractMetarootCreator { +protected: + // TODO + // Databases are not declared `const` since cdr_labeler requires: see germline_db_labeler.hpp + // @code: DbCDRLabeling GermlineDbLabeler::ComputeLabeling(); + // is not declared const + germline_utils::CustomGeneDatabase * v_db_p; + germline_utils::CustomGeneDatabase * j_db_p; + + const double prob_cleavage_v; + const double prob_cleavage_j; + + const AbstractVDJGeneChooserCPtr gene_chooser_p; + const AbstractNucleotidesRemoverCPtr nucl_remover_p; + const AbstractPNucleotidesCreatorCPtr nucl_creator_p; + const AbstractNNucleotidesInserterCPtr nucl_inserter_p; + + const cdr_labeler::DbCDRLabeling v_cdr_db; + const cdr_labeler::DbCDRLabeling j_cdr_db; + + const ProductivityChecker productivity_checker; + +public: + AbstractMetarootCreator(const MetarootSimulationParams& config, + std::vector& db, + AbstractVDJGeneChooserCPtr&& gene_chooser); + + AbstractMetarootCreator() = delete; + AbstractMetarootCreator(const AbstractMetarootCreator&) = delete; + AbstractMetarootCreator(AbstractMetarootCreator&&) = delete; + AbstractMetarootCreator& operator=(const AbstractMetarootCreator&) = delete; + AbstractMetarootCreator& operator=(AbstractMetarootCreator&) = delete; + + virtual AbstractMetarootCPtr Createroot() const = 0; + virtual ~AbstractMetarootCreator() { } +}; +using AbstractMetarootCreatorCPtr = std::unique_ptr; + + +class VJMetarootCreator final : public AbstractMetarootCreator { +public: + + VJMetarootCreator(const MetarootSimulationParams& config, + std::vector& db); + + AbstractMetarootCPtr Createroot() const override; +}; + + +class VDJMetarootCreator final : public AbstractMetarootCreator { +private: + germline_utils::CustomGeneDatabase * d_db_p; + + const double prob_cleavage_d_left; + const double prob_cleavage_d_right; + +public: + VDJMetarootCreator(const MetarootSimulationParams& config, + std::vector& db); + + AbstractMetarootCPtr Createroot() const override; +}; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/multiplicity_creator/multiplicity_creator.cpp b/src/ig_simulator/base_repertoire/multiplicity_creator/multiplicity_creator.cpp new file mode 100644 index 00000000..9c4db30d --- /dev/null +++ b/src/ig_simulator/base_repertoire/multiplicity_creator/multiplicity_creator.cpp @@ -0,0 +1,16 @@ +// +// Created by Andrew Bzikadze on 3/27/17. +// + +#include "multiplicity_creator.hpp" + +namespace ig_simulator { + +AbstractMultiplicityCreatorPtr get_multiplicity_creator(const MultiplicityCreatorParams &config) { + if (config.method == MultiplicityCreatorMethod::Geometric) { + return AbstractMultiplicityCreatorPtr(new GeometricMultiplicityCreator(config.geometric_params)); + } + VERIFY(false); +} + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/multiplicity_creator/multiplicity_creator.hpp b/src/ig_simulator/base_repertoire/multiplicity_creator/multiplicity_creator.hpp new file mode 100644 index 00000000..da969e0a --- /dev/null +++ b/src/ig_simulator/base_repertoire/multiplicity_creator/multiplicity_creator.hpp @@ -0,0 +1,55 @@ +// +// Created by Andrew Bzikadze on 3/27/17. +// + +#pragma once + +#include +#include + +#include "verify.hpp" + +#include "ig_simulator_utils.hpp" +#include "random_generator.hpp" +#include "ig_simulator_config.hpp" + +namespace ig_simulator { + +class AbstractMultiplicityCreator { +public: + AbstractMultiplicityCreator() = default; + AbstractMultiplicityCreator(const AbstractMultiplicityCreator&) = delete; + AbstractMultiplicityCreator(AbstractMultiplicityCreator&&) = delete; + AbstractMultiplicityCreator& operator=(const AbstractMultiplicityCreator&) = delete; + AbstractMultiplicityCreator& operator=(AbstractMultiplicityCreator&&) = delete; + + virtual size_t RandomMultiplicity() = 0; +}; + +using AbstractMultiplicityCreatorPtr = std::unique_ptr; + +class GeometricMultiplicityCreator final : public AbstractMultiplicityCreator { +private: + double lambda; + std::geometric_distribution distribution; + +public: + GeometricMultiplicityCreator(double lambda): + lambda(lambda), + distribution(check_numeric_positive(lambda)) + { } + + GeometricMultiplicityCreator(const MultiplicityCreatorParams::GeometricParams &config): + GeometricMultiplicityCreator(config.lambda) + { } + + size_t RandomMultiplicity() override { + return distribution(MTSingleton::GetInstance()) + 1; + } + + double Mean() const { return 1. / lambda + 1; } +}; + +AbstractMultiplicityCreatorPtr get_multiplicity_creator(const MultiplicityCreatorParams &config); + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/n_nucleotides_inserter/abstract_n_nucleotides_inserter.cpp b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/abstract_n_nucleotides_inserter.cpp new file mode 100644 index 00000000..5f4bbbbe --- /dev/null +++ b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/abstract_n_nucleotides_inserter.cpp @@ -0,0 +1,5 @@ +// +// Created by Andrew Bzikadze on 3/20/17. +// + +#include "abstract_n_nucleotides_inserter.hpp" diff --git a/src/ig_simulator/base_repertoire/n_nucleotides_inserter/abstract_n_nucleotides_inserter.hpp b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/abstract_n_nucleotides_inserter.hpp new file mode 100644 index 00000000..7221962b --- /dev/null +++ b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/abstract_n_nucleotides_inserter.hpp @@ -0,0 +1,30 @@ +// +// Created by Andrew Bzikadze on 3/20/17. +// + +#pragma once + +#include +#include +#include + +namespace ig_simulator { + +class AbstractNNucleotidesInserter { +public: + virtual seqan::Dna5String GetVJInsertion() const = 0; + virtual seqan::Dna5String GetVDInsertion() const = 0; + virtual seqan::Dna5String GetDJInsertion() const = 0; + + AbstractNNucleotidesInserter() = default; + AbstractNNucleotidesInserter(const AbstractNNucleotidesInserter&) = delete; + AbstractNNucleotidesInserter(AbstractNNucleotidesInserter&&) = delete; + AbstractNNucleotidesInserter& operator=(const AbstractNNucleotidesInserter&) = delete; + AbstractNNucleotidesInserter& operator=(AbstractNNucleotidesInserter&&) = delete; + + virtual ~AbstractNNucleotidesInserter() { } +}; + +using AbstractNNucleotidesInserterCPtr = std::unique_ptr; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/n_nucleotides_inserter/config_based_getter.cpp b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/config_based_getter.cpp new file mode 100644 index 00000000..70aa1e70 --- /dev/null +++ b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/config_based_getter.cpp @@ -0,0 +1,20 @@ +// +// Created by Andrew Bzikadze on 3/31/17. +// + +#include "config_based_getter.hpp" +#include "abstract_n_nucleotides_inserter.hpp" +#include "uniform_n_nucleotides_inserter.hpp" + + +namespace ig_simulator { + +AbstractNNucleotidesInserterCPtr get_nucleotides_inserter(const NNucleotidesInserterParams & config) +{ + if (config.method == NNucleotidesInserterMethod::Uniform) + return AbstractNNucleotidesInserterCPtr(new UniformNNucleotidesInserter(config.uniform_inserter_params)); + VERIFY(false); +} + +} // End namespace ig_simulator + diff --git a/src/ig_simulator/base_repertoire/n_nucleotides_inserter/config_based_getter.hpp b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/config_based_getter.hpp new file mode 100644 index 00000000..b24fbab6 --- /dev/null +++ b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/config_based_getter.hpp @@ -0,0 +1,14 @@ +// +// Created by Andrew Bzikadze on 3/31/17. +// + +#pragma once + +#include "abstract_n_nucleotides_inserter.hpp" +#include "ig_simulator_config.hpp" + +namespace ig_simulator { + +AbstractNNucleotidesInserterCPtr get_nucleotides_inserter(const NNucleotidesInserterParams & config); + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/n_nucleotides_inserter/uniform_n_nucleotides_inserter.cpp b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/uniform_n_nucleotides_inserter.cpp new file mode 100644 index 00000000..7a2fa60c --- /dev/null +++ b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/uniform_n_nucleotides_inserter.cpp @@ -0,0 +1,29 @@ +// +// Created by Andrew Bzikadze on 3/20/17. +// + +#include +#include "uniform_n_nucleotides_inserter.hpp" +#include "simulation_routines.hpp" + +using seqan::Dna5String; + +namespace ig_simulator { + +Dna5String UniformNNucleotidesInserter::RandDna5Str(size_t size) const { + auto RandomNucleotide = []() -> char { + return "ACGT"[random_index(0, 3)]; + }; + + std::vector v_str(size); + for (auto & nucl : v_str) { + nucl = RandomNucleotide(); + } + return Dna5String(std::string(v_str.begin(), v_str.end())); +} + +Dna5String UniformNNucleotidesInserter::GetVJInsertion() const { return RandDna5Str(random_index(0, max_vj_insertion)); } +Dna5String UniformNNucleotidesInserter::GetVDInsertion() const { return RandDna5Str(random_index(0, max_vd_insertion)); } +Dna5String UniformNNucleotidesInserter::GetDJInsertion() const { return RandDna5Str(random_index(0, max_dj_insertion)); } + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/n_nucleotides_inserter/uniform_n_nucleotides_inserter.hpp b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/uniform_n_nucleotides_inserter.hpp new file mode 100644 index 00000000..74620a98 --- /dev/null +++ b/src/ig_simulator/base_repertoire/n_nucleotides_inserter/uniform_n_nucleotides_inserter.hpp @@ -0,0 +1,33 @@ +// +// Created by Andrew Bzikadze on 3/20/17. +// + +#pragma once + +#include "abstract_n_nucleotides_inserter.hpp" +#include "ig_simulator_config.hpp" + +namespace ig_simulator { + +class UniformNNucleotidesInserter final : public AbstractNNucleotidesInserter { +private: + const size_t max_vj_insertion; + const size_t max_vd_insertion; + const size_t max_dj_insertion; + + seqan::Dna5String RandDna5Str(size_t size) const; + +public: + explicit UniformNNucleotidesInserter( + const NNucleotidesInserterParams::UniformInserterParams config): + max_vj_insertion(config.max_vj_insertion), + max_vd_insertion(config.max_vd_insertion), + max_dj_insertion(config.max_dj_insertion) + { } + + seqan::Dna5String GetVJInsertion() const override; + seqan::Dna5String GetVDInsertion() const override; + seqan::Dna5String GetDJInsertion() const override; +}; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/nucleotides_remover/abstract_nucleotides_remover.cpp b/src/ig_simulator/base_repertoire/nucleotides_remover/abstract_nucleotides_remover.cpp new file mode 100644 index 00000000..0a3196cc --- /dev/null +++ b/src/ig_simulator/base_repertoire/nucleotides_remover/abstract_nucleotides_remover.cpp @@ -0,0 +1,5 @@ +// +// Created by Andrew Bzikadze on 3/17/17. +// + +#include "abstract_nucleotides_remover.hpp" diff --git a/src/ig_simulator/base_repertoire/nucleotides_remover/abstract_nucleotides_remover.hpp b/src/ig_simulator/base_repertoire/nucleotides_remover/abstract_nucleotides_remover.hpp new file mode 100644 index 00000000..d7db5921 --- /dev/null +++ b/src/ig_simulator/base_repertoire/nucleotides_remover/abstract_nucleotides_remover.hpp @@ -0,0 +1,30 @@ +// +// Created by Andrew Bzikadze on 3/17/17. +// + +#pragma once + +#include +#include + +namespace ig_simulator { + +class AbstractNucleotidesRemover { +public: + virtual size_t RemoveInVGene() const = 0; + virtual size_t RemoveInDGeneLeft() const = 0; + virtual size_t RemoveInDGeneRight() const = 0; + virtual size_t RemoveInJGene() const = 0; + + AbstractNucleotidesRemover() = default; + AbstractNucleotidesRemover(const AbstractNucleotidesRemover&) = delete; + AbstractNucleotidesRemover(AbstractNucleotidesRemover&&) = delete; + AbstractNucleotidesRemover& operator=(const AbstractNucleotidesRemover&) = delete; + AbstractNucleotidesRemover& operator=(AbstractNucleotidesRemover&&) = delete; + + virtual ~AbstractNucleotidesRemover() { } +}; + +using AbstractNucleotidesRemoverCPtr = std::unique_ptr; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/nucleotides_remover/config_based_getter.cpp b/src/ig_simulator/base_repertoire/nucleotides_remover/config_based_getter.cpp new file mode 100644 index 00000000..670de9bd --- /dev/null +++ b/src/ig_simulator/base_repertoire/nucleotides_remover/config_based_getter.cpp @@ -0,0 +1,19 @@ +// +// Created by Andrew Bzikadze on 3/31/17. +// + +#include "config_based_getter.hpp" +#include "uniform_nucleotides_remover.hpp" + + +namespace ig_simulator { + +AbstractNucleotidesRemoverCPtr get_nucleotides_remover(const NucleotidesRemoverParams & config) +{ + if (config.method == NucleotidesRemoverMethod::Uniform) + return AbstractNucleotidesRemoverCPtr(new UniformNucleotidesRemover(config.uniform_remover_params)); + VERIFY(false); +} + +} // End namespace ig_simulator + diff --git a/src/ig_simulator/base_repertoire/nucleotides_remover/config_based_getter.hpp b/src/ig_simulator/base_repertoire/nucleotides_remover/config_based_getter.hpp new file mode 100644 index 00000000..5334ac31 --- /dev/null +++ b/src/ig_simulator/base_repertoire/nucleotides_remover/config_based_getter.hpp @@ -0,0 +1,14 @@ +// +// Created by Andrew Bzikadze on 3/31/17. +// + +#pragma once + +#include "abstract_nucleotides_remover.hpp" +#include "ig_simulator_config.hpp" + +namespace ig_simulator { + +AbstractNucleotidesRemoverCPtr get_nucleotides_remover(const NucleotidesRemoverParams & config); + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/nucleotides_remover/uniform_nucleotides_remover.cpp b/src/ig_simulator/base_repertoire/nucleotides_remover/uniform_nucleotides_remover.cpp new file mode 100644 index 00000000..ea689c41 --- /dev/null +++ b/src/ig_simulator/base_repertoire/nucleotides_remover/uniform_nucleotides_remover.cpp @@ -0,0 +1,15 @@ +// +// Created by Andrew Bzikadze on 3/17/17. +// + +#include "uniform_nucleotides_remover.hpp" +#include "simulation_routines.hpp" + +namespace ig_simulator { + +size_t UniformNucleotidesRemover::RemoveInVGene() const { return random_index(0, max_remove_v_gene); } +size_t UniformNucleotidesRemover::RemoveInDGeneLeft() const { return random_index(0, max_remove_d_gene_left); } +size_t UniformNucleotidesRemover::RemoveInDGeneRight() const { return random_index(0, max_remove_d_gene_right); } +size_t UniformNucleotidesRemover::RemoveInJGene() const { return random_index(0, max_remove_j_gene); } + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/nucleotides_remover/uniform_nucleotides_remover.hpp b/src/ig_simulator/base_repertoire/nucleotides_remover/uniform_nucleotides_remover.hpp new file mode 100644 index 00000000..c7e6e4e8 --- /dev/null +++ b/src/ig_simulator/base_repertoire/nucleotides_remover/uniform_nucleotides_remover.hpp @@ -0,0 +1,38 @@ +// +// Created by Andrew Bzikadze on 3/17/17. +// + +#pragma once + +#include +#include "abstract_nucleotides_remover.hpp" +#include "ig_simulator_config.hpp" + +namespace ig_simulator { + +class UniformNucleotidesRemover final : public AbstractNucleotidesRemover { +private: + const size_t max_remove_v_gene; + const size_t max_remove_d_gene_left; + const size_t max_remove_d_gene_right; + const size_t max_remove_j_gene; + +public: + explicit UniformNucleotidesRemover( + const NucleotidesRemoverParams::UniformRemoverParams config) : + AbstractNucleotidesRemover(), + max_remove_v_gene(config.max_remove_v_gene), + max_remove_d_gene_left(config.max_remove_d_gene_left), + max_remove_d_gene_right(config.max_remove_d_gene_right), + max_remove_j_gene(config.max_remove_j_gene) + { } + + virtual size_t RemoveInVGene() const override; + virtual size_t RemoveInDGeneLeft() const override; + virtual size_t RemoveInDGeneRight() const override; + virtual size_t RemoveInJGene() const override; + + virtual ~UniformNucleotidesRemover() { } +}; + +} // End namespace ig_simulator \ No newline at end of file diff --git a/src/ig_simulator/base_repertoire/p_nucleotides_creator/abstract_nucleotides_creator.cpp b/src/ig_simulator/base_repertoire/p_nucleotides_creator/abstract_nucleotides_creator.cpp new file mode 100644 index 00000000..a979c5b4 --- /dev/null +++ b/src/ig_simulator/base_repertoire/p_nucleotides_creator/abstract_nucleotides_creator.cpp @@ -0,0 +1,6 @@ +// +// Created by Andrew Bzikadze on 3/20/17. +// + +#include "abstract_nucleotides_creator.hpp" + diff --git a/src/ig_simulator/base_repertoire/p_nucleotides_creator/abstract_nucleotides_creator.hpp b/src/ig_simulator/base_repertoire/p_nucleotides_creator/abstract_nucleotides_creator.hpp new file mode 100644 index 00000000..386ae038 --- /dev/null +++ b/src/ig_simulator/base_repertoire/p_nucleotides_creator/abstract_nucleotides_creator.hpp @@ -0,0 +1,30 @@ +// +// Created by Andrew Bzikadze on 3/20/17. +// + +#pragma once + +#include +#include + +namespace ig_simulator { + +class AbstractPNucleotidesCreator { +public: + virtual size_t CreateInVGene() const = 0; + virtual size_t CreateInDGeneLeft() const = 0; + virtual size_t CreateInDGeneRight() const = 0; + virtual size_t CreateInJGene() const = 0; + + AbstractPNucleotidesCreator() = default; + AbstractPNucleotidesCreator(const AbstractPNucleotidesCreator&) = delete; + AbstractPNucleotidesCreator(AbstractPNucleotidesCreator&&) = delete; + AbstractPNucleotidesCreator& operator=(const AbstractPNucleotidesCreator&) = delete; + AbstractPNucleotidesCreator& operator=(AbstractPNucleotidesCreator&&) = delete; + + virtual ~AbstractPNucleotidesCreator() { } +}; + +using AbstractPNucleotidesCreatorCPtr = std::unique_ptr; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/p_nucleotides_creator/config_based_getter.cpp b/src/ig_simulator/base_repertoire/p_nucleotides_creator/config_based_getter.cpp new file mode 100644 index 00000000..04938e45 --- /dev/null +++ b/src/ig_simulator/base_repertoire/p_nucleotides_creator/config_based_getter.cpp @@ -0,0 +1,19 @@ +// +// Created by Andrew Bzikadze on 3/31/17. +// + +#include "config_based_getter.hpp" +#include "uniform_nucleotides_creator.hpp" + + +namespace ig_simulator { + +AbstractPNucleotidesCreatorCPtr get_nucleotides_creator(const PNucleotidesCreatorParams &config) +{ + if (config.method == PNucleotidesCreatorMethod::Uniform) + return AbstractPNucleotidesCreatorCPtr(new UniformPNucleotidesCreator(config.uniform_creator_params)); + VERIFY(false); +} + +} // End namespace ig_simulator + diff --git a/src/ig_simulator/base_repertoire/p_nucleotides_creator/config_based_getter.hpp b/src/ig_simulator/base_repertoire/p_nucleotides_creator/config_based_getter.hpp new file mode 100644 index 00000000..4115dbc7 --- /dev/null +++ b/src/ig_simulator/base_repertoire/p_nucleotides_creator/config_based_getter.hpp @@ -0,0 +1,14 @@ +// +// Created by Andrew Bzikadze on 3/31/17. +// + +#pragma once + +#include "abstract_nucleotides_creator.hpp" +#include "ig_simulator_config.hpp" + +namespace ig_simulator { + +AbstractPNucleotidesCreatorCPtr get_nucleotides_creator(const PNucleotidesCreatorParams &config); + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/p_nucleotides_creator/uniform_nucleotides_creator.cpp b/src/ig_simulator/base_repertoire/p_nucleotides_creator/uniform_nucleotides_creator.cpp new file mode 100644 index 00000000..e60eb0b4 --- /dev/null +++ b/src/ig_simulator/base_repertoire/p_nucleotides_creator/uniform_nucleotides_creator.cpp @@ -0,0 +1,15 @@ +// +// Created by Andrew Bzikadze on 3/20/17. +// + +#include "uniform_nucleotides_creator.hpp" +#include "simulation_routines.hpp" + +namespace ig_simulator { + +size_t UniformPNucleotidesCreator::CreateInVGene() const { return random_index(0, max_create_v_gene); } +size_t UniformPNucleotidesCreator::CreateInDGeneLeft() const { return random_index(0, max_create_d_gene_left); } +size_t UniformPNucleotidesCreator::CreateInDGeneRight() const { return random_index(0, max_create_d_gene_right); } +size_t UniformPNucleotidesCreator::CreateInJGene() const { return random_index(0, max_create_j_gene); } + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/p_nucleotides_creator/uniform_nucleotides_creator.hpp b/src/ig_simulator/base_repertoire/p_nucleotides_creator/uniform_nucleotides_creator.hpp new file mode 100644 index 00000000..ac84fa1f --- /dev/null +++ b/src/ig_simulator/base_repertoire/p_nucleotides_creator/uniform_nucleotides_creator.hpp @@ -0,0 +1,34 @@ +// +// Created by Andrew Bzikadze on 3/20/17. +// + +#pragma once + +#include "abstract_nucleotides_creator.hpp" +#include "ig_simulator_config.hpp" + +namespace ig_simulator { + +class UniformPNucleotidesCreator final : public AbstractPNucleotidesCreator { +private: + size_t max_create_v_gene; + size_t max_create_d_gene_left; + size_t max_create_d_gene_right; + size_t max_create_j_gene; + +public: + explicit UniformPNucleotidesCreator( + const PNucleotidesCreatorParams::UniformCreatorParams config) : + max_create_v_gene(config.max_create_v_gene), + max_create_d_gene_left(config.max_create_d_gene_left), + max_create_d_gene_right(config.max_create_d_gene_right), + max_create_j_gene(config.max_create_j_gene) + { } + + virtual size_t CreateInVGene() const override; + virtual size_t CreateInDGeneLeft() const override; + virtual size_t CreateInDGeneRight() const override; + virtual size_t CreateInJGene() const override; +}; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/base_repertoire/productivity_checker/productivity_checker.cpp b/src/ig_simulator/base_repertoire/productivity_checker/productivity_checker.cpp new file mode 100644 index 00000000..acd521cd --- /dev/null +++ b/src/ig_simulator/base_repertoire/productivity_checker/productivity_checker.cpp @@ -0,0 +1,17 @@ +// +// Created by Andrew Bzikadze on 3/27/17. +// + +#include "productivity_checker.hpp" + +namespace ig_simulator { + +bool ProductivityChecker::IsProductive(const AbstractMetaroot& root) const { + if (root.CDRLabeling().Empty()) + return false; + core::Read read("", root.Sequence(), 0); + auto aa = aa_calculator->ComputeAminoAcidAnnotation(read, root.CDRLabeling()); + return not aa.HasStopCodon() and aa.InFrame(); +} + +} // End namespace ig_simulator \ No newline at end of file diff --git a/src/ig_simulator/base_repertoire/productivity_checker/productivity_checker.hpp b/src/ig_simulator/base_repertoire/productivity_checker/productivity_checker.hpp new file mode 100644 index 00000000..10d997a3 --- /dev/null +++ b/src/ig_simulator/base_repertoire/productivity_checker/productivity_checker.hpp @@ -0,0 +1,34 @@ +// +// Created by Andrew Bzikadze on 3/27/17. +// + +#pragma once + +#include "base_repertoire/metaroot/metaroot.hpp" +#include "annotation_utils/aa_annotation/aa_calculator.hpp" + +namespace ig_simulator { + +class ProductivityChecker { +private: + const annotation_utils::BaseAACalculatorPtr aa_calculator; + +public: + explicit ProductivityChecker(annotation_utils::BaseAACalculatorPtr aa_calculator = + annotation_utils::BaseAACalculatorPtr(new annotation_utils::SimpleAACalculator())): + aa_calculator(std::move(aa_calculator)) + { } + + bool IsProductive(const AbstractMetaroot& root) const; + + bool IsProductive(const AbstractMetarootCPtr& root) const { + return IsProductive(*check_pointer(root)); + } + + ProductivityChecker(const ProductivityChecker&) = delete; + ProductivityChecker(ProductivityChecker&&) = delete; + ProductivityChecker& operator=(const ProductivityChecker&) = delete; + ProductivityChecker& operator=(ProductivityChecker&&) = delete; +}; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/clonal_trees/fast_stop_codon_checker/fast_stop_codon_checker.cpp b/src/ig_simulator/clonal_trees/fast_stop_codon_checker/fast_stop_codon_checker.cpp new file mode 100644 index 00000000..7b06dedf --- /dev/null +++ b/src/ig_simulator/clonal_trees/fast_stop_codon_checker/fast_stop_codon_checker.cpp @@ -0,0 +1,11 @@ +// +// Created by Andrew Bzikadze on 4/25/17. +// + +#include "fast_stop_codon_checker.hpp" + +namespace ig_simulator { + +constexpr std::array FastStopCodonChecker::stop_codons_hashes; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/clonal_trees/fast_stop_codon_checker/fast_stop_codon_checker.hpp b/src/ig_simulator/clonal_trees/fast_stop_codon_checker/fast_stop_codon_checker.hpp new file mode 100644 index 00000000..68bdefac --- /dev/null +++ b/src/ig_simulator/clonal_trees/fast_stop_codon_checker/fast_stop_codon_checker.hpp @@ -0,0 +1,81 @@ +// +// Created by Andrew Bzikadze on 4/25/17. +// + +#pragma once + +#include +#include +#include +#include + +namespace ig_simulator { + +class FastStopCodonCheckerDetails { +friend class FastStopCodonChecker; +private: + constexpr static unsigned get_hash(const char *s, + const unsigned hash_base, + const unsigned hash_base_sq) { + return s[0] + hash_base * s[1] + hash_base_sq * s[2]; + } + + static unsigned get_hash(std::string &&s, + const unsigned hash_base, + const unsigned hash_base_sq) { + return get_hash(s.c_str(), hash_base, hash_base_sq); + } +}; + +class FastStopCodonChecker { +private: + constexpr const static unsigned hash_base { 10 }; + constexpr const static unsigned hash_base_sq { hash_base * hash_base }; + + /** + * I have to use the following hack and refrain from using std::array for stop_codons because + * on OSX El Capitan neigher std::array::operator[] nor std::array std::get + * are not declared constexpr. + * The following code should be used instead in the future: + * @code + * constexpr const static std::array stop_codons { "TAG", "TAA", "TGA" }; + * constexpr const static std::array stop_codons_hashes + * {{ + * sc_checker_details::get_hash(std::get<0>(stop_codons), hash_base, hash_base_sq), + * sc_checker_details::get_hash(std::get<1>(stop_codons), hash_base, hash_base_sq), + * sc_checker_details::get_hash(std::get<2>(stop_codons), hash_base, hash_base_sq) + * }}; + */ + constexpr const static std::array stop_codons_hashes + {{ + FastStopCodonCheckerDetails::get_hash("TAG", hash_base, hash_base_sq), + FastStopCodonCheckerDetails::get_hash("TAA", hash_base, hash_base_sq), + FastStopCodonCheckerDetails::get_hash("TGA", hash_base, hash_base_sq) + }}; + +public: + bool static HasStopCodon(const std::string& str, size_t orf) { + for(size_t i = orf; i + 2 < str.length(); i += 3) { + size_t hash = FastStopCodonCheckerDetails::get_hash(str.substr(i, 3), + hash_base, hash_base_sq); + if (std::find(stop_codons_hashes.begin(), stop_codons_hashes.end(), hash) + != stop_codons_hashes.end()) + { + return true; + } + } + return false; + } + + bool static HasStopCodon(const std::string& str, const annotation_utils::CDRLabeling& labeling) { + return HasStopCodon(str, labeling.cdr1.start_pos % 3); + } + + FastStopCodonChecker() = delete; + FastStopCodonChecker(const FastStopCodonChecker&) = delete; + FastStopCodonChecker(FastStopCodonChecker&&) = delete; + FastStopCodonChecker& operator=(const FastStopCodonChecker&) = delete; + FastStopCodonChecker& operator=(FastStopCodonChecker&&) = delete; +}; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/clonal_trees/forest/forest.cpp b/src/ig_simulator/clonal_trees/forest/forest.cpp new file mode 100644 index 00000000..1bbb1fc2 --- /dev/null +++ b/src/ig_simulator/clonal_trees/forest/forest.cpp @@ -0,0 +1,21 @@ +// +// Created by Andrew Bzikadze on 4/9/17. +// + +#include "forest.hpp" + +namespace ig_simulator { + +std::ostream& operator<<(std::ostream& out, const Forest& forest) { + for(size_t i = 0; i < forest.trees.size(); ++i) { + const auto& tree = forest.trees[i]; + out << "===============================================\n"; + out << "Tree # " << i + 1 << " / " << forest.trees.size() << '\n'; + out << "===============================================\n"; + out << tree; + out << '\n'; + } + return out; +} + +} // End namespace ig_simulator diff --git a/src/ig_simulator/clonal_trees/forest/forest.hpp b/src/ig_simulator/clonal_trees/forest/forest.hpp new file mode 100644 index 00000000..8636fbbe --- /dev/null +++ b/src/ig_simulator/clonal_trees/forest/forest.hpp @@ -0,0 +1,42 @@ +// +// Created by Andrew Bzikadze on 4/9/17. +// + +#pragma once + +#include "clonal_trees/tree/tree.hpp" +#include "base_repertoire/metaroot_cluster/metaroot_cluster.hpp" + +namespace ig_simulator { + +class Forest { +private: + const MetarootCluster* metaroot_cluster; + const std::vector trees; + +public: + Forest(const MetarootCluster* const metaroot_cluster, + std::vector&& trees = {}) noexcept: + metaroot_cluster(metaroot_cluster), + trees(trees) + { } + + Forest(const Forest&) = default; + Forest(Forest&&) = default; + + Forest& operator=(const Forest&) = default; + Forest& operator=(Forest&&) = default; + + const MetarootCluster* GetMetarootCluster() const { return metaroot_cluster; } + const std::vector& Trees() const { return trees; } + + size_t Size() const { return trees.size(); } + + friend std::ostream& operator<<(std::ostream&, const Forest&); +}; + +std::ostream& operator<<(std::ostream& out, const Forest&); + +using ForestStorage = std::vector; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/clonal_trees/tree/node.cpp b/src/ig_simulator/clonal_trees/tree/node.cpp new file mode 100644 index 00000000..9ee7d08f --- /dev/null +++ b/src/ig_simulator/clonal_trees/tree/node.cpp @@ -0,0 +1,5 @@ +// +// Created by Andrew Bzikadze on 4/9/17. +// + +#include "node.hpp" diff --git a/src/ig_simulator/clonal_trees/tree/node.hpp b/src/ig_simulator/clonal_trees/tree/node.hpp new file mode 100644 index 00000000..03d4e81c --- /dev/null +++ b/src/ig_simulator/clonal_trees/tree/node.hpp @@ -0,0 +1,55 @@ +// +// Created by Andrew Bzikadze on 4/9/17. +// + +#pragma once + +#include +#include +#include +#include + +#include "seqan/basic.h" + + +namespace ig_simulator { + +class Node { +public: + using SHM_Vector = std::vector>; + +private: + const size_t parent_ind; + + // We store only SHMs "on the edge" from the parent + const SHM_Vector shms; + bool included; + bool productive; + +public: + Node(size_t parent_ind = size_t(-1), + SHM_Vector&& shms = {}, + bool included = true, + bool productive = true): + parent_ind(parent_ind), + shms(std::move(shms)), + included(included), + productive(productive) + { } + + Node(const Node&) = default; + Node(Node&&) = default; + Node& operator=(const Node&) = default; + Node& operator=(Node&&) = default; + + size_t ParentInd() const { return parent_ind; } + const SHM_Vector& SHMs() const { return shms; } + + void Exclude() { included = false; } + void MakeNonProductive() { productive = false; } + + bool IsIncluded() const { return included; } + bool IsProductive() const { return productive; } +}; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/clonal_trees/tree/tree.cpp b/src/ig_simulator/clonal_trees/tree/tree.cpp new file mode 100644 index 00000000..fe3310b5 --- /dev/null +++ b/src/ig_simulator/clonal_trees/tree/tree.cpp @@ -0,0 +1,46 @@ +// +// Created by Andrew Bzikadze on 4/9/17. +// + +#include "tree.hpp" +#include "verify.hpp" +#include "annotation_utils/aa_annotation/aa_calculator.hpp" + +namespace ig_simulator { + +std::ostream& operator<<(std::ostream& out, const Tree& tree) { + VERIFY(tree.nodes.size() >= 1); + + out << "digraph G {\n"; + out << '\t' << 0 << " [shape = " << (tree.Metaroot()->IsProductive() ? "circle" : "box") << "," << + "fillcolor = " << (tree.nodes.front().IsIncluded() ? "cyan" : "magenta") << "," << + "style = filled,size=1]; // " << + '(' << (tree.nodes.front().IsIncluded() ? "included" : "excluded") << ')' << ' ' << + '(' << (tree.nodes.front().IsProductive() ? "productive" : "non-productive") << ')' << '\n'; + + for (size_t i = 1; i < tree.nodes.size(); ++i) { + const auto& node = tree.nodes[i]; + const auto& shms = node.SHMs(); + out << '\t' << i << " [shape = " << (node.IsProductive() ? "circle" : "box" ) << "," << + "fillcolor = " << (node.IsIncluded() ? "cyan" : "magenta") << "," << + "style = filled,size=1]; // " << + '(' << (node.IsIncluded() ? "included" : "excluded") << ')' << ' ' << + '(' << (node.IsProductive() ? "productive" : "non-productive") << ')' << '\n'; + out << '\t' << node.ParentInd() << " -> " << i << "[minlen = " << std::to_string(shms.size()) << "]; // "; + out << "total shms: " << shms.size() << " "; + for(const auto& shm : shms) { + out << "(at " << std::get<0>(shm) << + " from " << std::get<1>(shm) << + " to " << std::get<2>(shm) << ')' << ' '; + } + out << '\n'; + + // annotation_utils::SimpleAACalculator aa_calculator; + // core::Read read("", tree.Sequences()[node.ParentInd()], 0); + // VERIFY(not aa_calculator.ComputeAminoAcidAnnotation(read, tree.Metaroot()->CDRLabeling()).HasStopCodon()); + } + out << "}\n"; + return out; +} + +} // End namespace ig_simulator diff --git a/src/ig_simulator/clonal_trees/tree/tree.hpp b/src/ig_simulator/clonal_trees/tree/tree.hpp new file mode 100644 index 00000000..491c1f3f --- /dev/null +++ b/src/ig_simulator/clonal_trees/tree/tree.hpp @@ -0,0 +1,46 @@ +// +// Created by Andrew Bzikadze on 4/9/17. +// + +#pragma once + +#include "node.hpp" +#include "base_repertoire/metaroot/metaroot.hpp" + +namespace ig_simulator { + +class Tree { + const AbstractMetaroot* metaroot; + const std::vector nodes; + const std::vector sequences; + +public: + Tree(const AbstractMetaroot* const metaroot, + std::vector&& nodes = {}, + std::vector&& sequences = {}) noexcept: + metaroot(metaroot), + nodes(std::move(nodes)), + sequences(std::move(sequences)) + { } + + Tree(const Tree&) = default; + Tree(Tree&&) = default; + + Tree& operator=(const Tree&) = default; + Tree& operator=(Tree&&) = default; + + size_t Size() const { return nodes.size(); } + const AbstractMetaroot* Metaroot() const { return metaroot; } + + const std::vector& Sequences() const { return sequences; } + + bool IsNodeIncluded(size_t node_ind) const { + return nodes[node_ind].IsIncluded(); + } + + friend std::ostream& operator<<(std::ostream& out, const Tree& tree); +}; + +std::ostream& operator<<(std::ostream& out, const Tree& tree); + +} // End namespace ig_simulator diff --git a/src/ig_simulator/clonal_trees/tree_creator/cartesian_tree.cpp b/src/ig_simulator/clonal_trees/tree_creator/cartesian_tree.cpp new file mode 100644 index 00000000..789c0aea --- /dev/null +++ b/src/ig_simulator/clonal_trees/tree_creator/cartesian_tree.cpp @@ -0,0 +1,5 @@ +// +// Created by Andrew Bzikadze on 4/10/17. +// + +#include "cartesian_tree.hpp" diff --git a/src/ig_simulator/clonal_trees/tree_creator/cartesian_tree.hpp b/src/ig_simulator/clonal_trees/tree_creator/cartesian_tree.hpp new file mode 100644 index 00000000..24b1d646 --- /dev/null +++ b/src/ig_simulator/clonal_trees/tree_creator/cartesian_tree.hpp @@ -0,0 +1,249 @@ +// +// Created by Andrew Bzikadze on 4/10/17. +// + +#pragma once + +#include +#include +#include "verify.hpp" +#include "simulation_routines.hpp" + +namespace ig_simulator { + +template +class Treap { +private: + struct TreapNode; + // TODO change to unique_ptr + // using TreapNodePtr = std::shared_ptr; + using TreapNodePtr = TreapNode*; + + struct TreapNode { + // @field key -- index in Tree (unique) + // @field freq -- the "rational" probability in discrete distribution + // @field sum -- sum of freq in the subtree possesing @this as a root + // @field prior -- normally random priority for the heap + KeyType key; + FreqType freq, sum; + PriorType prior; + + TreapNodePtr left, right; + + TreapNode(KeyType key, FreqType freq, + PriorType prior, + TreapNodePtr left = nullptr, TreapNodePtr right = nullptr) : + key(key), freq(freq), sum(freq), + prior(prior), + left(left), right(right) + { } + + ~TreapNode() { + delete left; + delete right; + } + + static FreqType Sum(const TreapNodePtr &t) { + if (t != nullptr) + return t->sum; + return 0; + } + + static void Upd(TreapNodePtr &t) { + if (t != nullptr) + t->sum = Sum(t->left) + Sum(t->right) + t->freq; + } + }; + + TreapNodePtr root; + size_t treap_size; + +private: + static void Merge(TreapNodePtr *pt, TreapNodePtr &l, TreapNodePtr &r) { + if (l == nullptr) + *pt = r; + else if (r == nullptr) + *pt = l; + else if (l->prior < r->prior) { + Merge(&l->right, l->right, r); + *pt = l; + } else { + Merge(&r->left, l, r->left); + *pt = r; + } + TreapNode::Upd(*pt); + // Check(*pt); + } + + static void Split(TreapNodePtr t, KeyType key, TreapNodePtr *l, TreapNodePtr *r) { + if (t == nullptr) { + *l = nullptr; + *r = nullptr; + } + else if (t->key < key) { + Split(t->right, key, &t->right, r); + *l = t; + TreapNode::Upd(*l); + } else { + Split(t->left, key, l, &t->left); + *r = t; + TreapNode::Upd(*r); + } + // Check(t); + } + + static void Check(TreapNodePtr t) { + if (t == nullptr) + return; + VERIFY(t->sum == TreapNode::Sum(t->left) + TreapNode::Sum(t->right) + t->freq); + Check(t->left); + Check(t->right); + } + +public: + Treap(): root(nullptr), treap_size(0) { } + ~Treap() { delete root; } + + void Insert(KeyType key, FreqType freq, PriorType prior = random_index()) { + VERIFY(not Contains(key)); + TreapNodePtr * pt = &root; + while (*pt and (*pt)->prior < prior) { + (*pt)->sum += freq; + if (key < (*pt)->key) + pt = &(*pt)->left; + else + pt = &(*pt)->right; + } + TreapNodePtr l, r; + Split(*pt, key, &l, &r); + *pt = TreapNodePtr(new TreapNode(key, freq, prior, l, r)); + TreapNode::Upd(*pt); + treap_size++; + // Check(); + } + + void Erase(KeyType key, FreqType freq) { + TreapNodePtr * pt = &root; + while ((*pt)->key != key) { + (*pt)->sum -= freq; + if (key < (*pt)->key) + pt = &(*pt)->left; + else + pt = &(*pt)->right; + } + VERIFY_MSG((*pt)->freq == freq, (*pt)->freq << " " << freq); + TreapNodePtr p; + Merge(&p, (*pt)->left, (*pt)->right); + (*pt)->left = nullptr; + (*pt)->right = nullptr; + // TODO Fix bug with not setting to nullptr pointer of parent of *pt if *pt has no children + delete *pt; + *pt = p; + treap_size--; + // Check(); + } + + void Erase(KeyType key) { + FreqType freq = GetFreq(key); + Erase(key, freq); + } + + KeyType FindBySum(FreqType sum) const { + TreapNodePtr t = root; + FreqType temp; + while((temp = TreapNode::Sum(t->right) + 1) != sum) { + if (temp > sum) + t = t->right; + else { + t = t->left; + sum -= temp; + } + } + return t->key; + } + + bool Contains(KeyType key) const { + TreapNodePtr t = root; + while (t != nullptr) { + if (t->key == key) { + return true; + } + if (t->key > key) { + t = t->left; + } else { + t = t->right; + } + } + return false; + } + + FreqType GetFreq(KeyType key) const { + VERIFY(Contains(key)); + TreapNodePtr t = root; + while (t != nullptr) { + if (t->key == key) { + return t->freq; + } + if (t->key > key) { + t = t->left; + } else { + t = t->right; + } + } + VERIFY(false); + } + + void SetFreq(KeyType key, FreqType old_freq, FreqType new_freq) { + TreapNodePtr t = root; + while(t->key != key) { + // if FreqType is unsigned `new_freq - old_freq` is dangerous + // std::cout << t->sum << " " << old_freq << "\n"; + VERIFY_MSG(t->sum >= old_freq, + std::string("t->sum = ") + std::to_string(t->sum) + + ", old_freq = " + std::to_string(old_freq)); + t->sum = t->sum - old_freq + new_freq; + if (t->key > key) + t = t->left; + else + t = t->right; + } + t->freq = new_freq; + // t->sum = t->sum - old_freq + new_freq; + TreapNode::Upd(t); + } + + std::pair LowerBound(FreqType sum) const { + TreapNodePtr t = root; + FreqType sum_left, sum_right; + + while(true) { + VERIFY_MSG(t != nullptr, std::string("Asked Sum: ") + std::to_string(sum) + + " Full Sum: " + std::to_string(Sum())); + sum_left = TreapNode::Sum(t->left); + sum_right = TreapNode::Sum(t->right); + + if (sum_left + sum_right <= sum) + break; + + if (sum_left > sum ) + t = t->left; + else { + t = t->right; + sum -= sum_left; + } + } + return { t->key, t->freq }; + } + + FreqType Sum() const { + return TreapNode::Sum(root); + } + + size_t Size() const { return treap_size; } + + void Check() const { + Check(root); + } +}; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/clonal_trees/tree_creator/exporters.cpp b/src/ig_simulator/clonal_trees/tree_creator/exporters.cpp new file mode 100644 index 00000000..f158a9d4 --- /dev/null +++ b/src/ig_simulator/clonal_trees/tree_creator/exporters.cpp @@ -0,0 +1,52 @@ +// +// Created by Andrew Bzikadze on 4/14/17. +// + +#include "exporters.hpp" + +namespace ig_simulator { + +void TreeExporter(const Tree& tree, size_t forest_ind, size_t tree_ind, + std::ostream& full, std::ostream& included) +{ + const auto sequences = tree.Sequences(); + for (size_t i = 0; i < sequences.size(); ++i) { + std::stringstream id_ss; + id_ss << ">forest_" << forest_ind << "_tree_" << tree_ind << "_antibody_" << i; + std::string id { id_ss.str() }; + full << id << '\n' << sequences[i] << '\n'; + if (tree.IsNodeIncluded(i)) { + included << id << '\n' << sequences[i] << '\n'; + } + } +} + +void ForestExporter(const Forest& forest, size_t forest_ind, std::ostream& full, std::ostream& included) { + for (size_t i = 0; i < forest.Trees().size(); ++i) { + TreeExporter(forest.Trees()[i], forest_ind, i, full, included); + } +} + +void ForestStorageExporter(const ForestStorage& forest_storage, std::ostream& full, std::ostream& included) { + for (size_t i = 0; i < forest_storage.size(); ++i) { + ForestExporter(forest_storage[i], i, full, included); + } +} + +void EdgeListsExporters(const ForestStorage& forest_storage, const IgSimulatorConfig::IOParams::OutputParams& config) { + std::string path = path::append_path(config.output_dir, config.trees_dir); + path::make_dir(path); + for (size_t i = 0; i < forest_storage.size(); ++i) { + for (size_t j = 0; j < forest_storage[i].Size(); ++j) { + std::stringstream filename; + filename << "forest_" << i << "_tree_" << j << ".dot"; + std::string full_filename = path::append_path(path, filename.str()); + std::ofstream out; + out.open(full_filename); + out << forest_storage[i].Trees()[j]; + out.close(); + } + } +} + +} // End namespace ig_simulator diff --git a/src/ig_simulator/clonal_trees/tree_creator/exporters.hpp b/src/ig_simulator/clonal_trees/tree_creator/exporters.hpp new file mode 100644 index 00000000..c6c5b3a2 --- /dev/null +++ b/src/ig_simulator/clonal_trees/tree_creator/exporters.hpp @@ -0,0 +1,20 @@ +// +// Created by Andrew Bzikadze on 4/14/17. +// + +#pragma once + +#include +#include +#include +#include "ig_simulator_config.hpp" + +namespace ig_simulator { + +void TreeExporter(const Tree& tree, size_t forest_ind, size_t tree_ind, std::ostream& full, std::ostream& included); +void ForestExporter(const Forest& forest, size_t forest_ind, std::ostream& full, std::ostream& included); +void ForestStorageExporter(const ForestStorage& forest_storage, std::ostream& full, std::ostream& included); + +void EdgeListsExporters(const ForestStorage& forest_storage, const IgSimulatorConfig::IOParams::OutputParams& config); + +} // End namespace ig_simulator diff --git a/src/ig_simulator/clonal_trees/tree_creator/forest_creator.cpp b/src/ig_simulator/clonal_trees/tree_creator/forest_creator.cpp new file mode 100644 index 00000000..3c94902d --- /dev/null +++ b/src/ig_simulator/clonal_trees/tree_creator/forest_creator.cpp @@ -0,0 +1,9 @@ +// +// Created by Andrew Bzikadze on 4/14/17. +// + +#include "forest_creator.hpp" + +namespace ig_simulator { + +} // End namespace ig_simulator \ No newline at end of file diff --git a/src/ig_simulator/clonal_trees/tree_creator/forest_creator.hpp b/src/ig_simulator/clonal_trees/tree_creator/forest_creator.hpp new file mode 100644 index 00000000..6835f0f3 --- /dev/null +++ b/src/ig_simulator/clonal_trees/tree_creator/forest_creator.hpp @@ -0,0 +1,40 @@ +// +// Created by Andrew Bzikadze on 4/14/17. +// + +#pragma once + +#include "tree_creator.hpp" +#include "clonal_trees/forest/forest.hpp" +#include "base_repertoire/metaroot_cluster/metaroot_cluster.hpp" + +namespace ig_simulator { + +class ForestCreator { +private: + const TreeCreator tree_creator; + +public: + ForestCreator(const vj_finder::VJFinderConfig& vjf_config, + const ClonalTreeSimulatorParams& config): + tree_creator(vjf_config, config) + { } + + ForestCreator(const ForestCreator&) = delete; + ForestCreator(ForestCreator&&) = delete; + ForestCreator& operator=(const ForestCreator&) = delete; + ForestCreator& operator=(ForestCreator&&) = delete; + + template + Forest GenerateForest(const MetarootCluster& root) const { + std::vector trees; + trees.reserve(root.Multiplicity()); + for(size_t i = 0; i < root.Multiplicity(); ++i) { + Tree tree { tree_creator.GenerateTree(root.MetarootPtr().get()) }; + trees.emplace_back(std::move(tree)); + } + return Forest(&root, std::move(trees)); + } +}; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/clonal_trees/tree_creator/forest_storage_creator.cpp b/src/ig_simulator/clonal_trees/tree_creator/forest_storage_creator.cpp new file mode 100644 index 00000000..9e506faf --- /dev/null +++ b/src/ig_simulator/clonal_trees/tree_creator/forest_storage_creator.cpp @@ -0,0 +1,5 @@ +// +// Created by Andrew Bzikadze on 4/14/17. +// + +#include "forest_storage_creator.hpp" diff --git a/src/ig_simulator/clonal_trees/tree_creator/forest_storage_creator.hpp b/src/ig_simulator/clonal_trees/tree_creator/forest_storage_creator.hpp new file mode 100644 index 00000000..3fa62550 --- /dev/null +++ b/src/ig_simulator/clonal_trees/tree_creator/forest_storage_creator.hpp @@ -0,0 +1,37 @@ +// +// Created by Andrew Bzikadze on 4/14/17. +// + +#pragma once + +#include "forest_creator.hpp" +#include "base_repertoire/base_repertoire.hpp" + +namespace ig_simulator { + +class ForestStorageCreator { +private: + const ForestCreator forest_creator; + +public: + ForestStorageCreator(const vj_finder::VJFinderConfig& vjf_config, + const ClonalTreeSimulatorParams& config): + forest_creator(vjf_config, config) + { } + + ForestStorageCreator(const ForestStorageCreator&) = delete; + ForestStorageCreator(ForestStorageCreator&&) = delete; + ForestStorageCreator& operator=(const ForestStorageCreator&) = delete; + ForestStorageCreator& operator=(ForestStorageCreator&&) = delete; + + template + ForestStorage GenerateForest(const BaseRepertoire& repertoire) const { + ForestStorage storage; + for(const auto& cluster : repertoire) { + storage.emplace_back(forest_creator.GenerateForest(cluster)); + } + return storage; + } +}; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/clonal_trees/tree_creator/pool_manager.cpp b/src/ig_simulator/clonal_trees/tree_creator/pool_manager.cpp new file mode 100644 index 00000000..d0b67309 --- /dev/null +++ b/src/ig_simulator/clonal_trees/tree_creator/pool_manager.cpp @@ -0,0 +1,70 @@ +// +// Created by Andrew Bzikadze on 4/9/17. +// + +#include +#include "pool_manager.hpp" + +namespace ig_simulator { + +std::pair UniformPoolManager::GetIndex(size_t n_insert) { + size_t raw_index = random_index(1, pool.Sum()); + size_t index, freq; + std::tie(index, freq) = pool.LowerBound(raw_index); + VERIFY(freq == 1); + + for (size_t i = 0; i < n_insert; ++i) { + pool.Insert(max_index++, 1); + } + + bool ret_to_pool = ret_to_pool_distr(MTSingleton::GetInstance()); + if (not ret_to_pool) { + pool.Erase(index, freq); + } + return { index, ret_to_pool }; +} + +std::pair WideTreePoolManager::GetIndex(size_t n_insert) { + size_t raw_index = random_index(1, pool.Sum()); + size_t index, freq; + std::tie(index, freq) = pool.LowerBound(raw_index); + + for (size_t i = 0; i < n_insert; ++i) { + pool.Insert(max_index++, 1); + } + + bool ret_to_pool = ret_to_pool_distr(MTSingleton::GetInstance()); + if (ret_to_pool) { + pool.SetFreq(index, freq, freq + 1); + } else { + pool.Erase(index, freq); + } + return { index, ret_to_pool }; +} + +std::pair DeepTreePoolManager::GetIndex(size_t n_insert) { + size_t raw_index = random_index(1, pool.Sum()); + size_t index, freq; + std::tie(index, freq) = pool.LowerBound(raw_index); + + size_t new_freq = freq + 1; + if (freq < std::numeric_limits::max()) { + new_freq += static_cast(static_cast(new_freq) * 0.5); + } else { + new_freq += static_cast(sqrt(static_cast(new_freq))); + } + + for (size_t i = 0; i < n_insert; ++i) { + pool.Insert(max_index++, new_freq); + } + + bool ret_to_pool = ret_to_pool_distr(MTSingleton::GetInstance()); + if (ret_to_pool) { + pool.SetFreq(index, freq, new_freq); + } else { + pool.Erase(index, freq); + } + return { index, ret_to_pool }; +} + +} // End namespace ig_simulator diff --git a/src/ig_simulator/clonal_trees/tree_creator/pool_manager.hpp b/src/ig_simulator/clonal_trees/tree_creator/pool_manager.hpp new file mode 100644 index 00000000..11a53bcf --- /dev/null +++ b/src/ig_simulator/clonal_trees/tree_creator/pool_manager.hpp @@ -0,0 +1,74 @@ +// +// Created by Andrew Bzikadze on 4/9/17. +// + +#pragma once + +#include +#include "cartesian_tree.hpp" +#include "simulation_routines.hpp" +#include "ig_simulator_utils.hpp" + +namespace ig_simulator { + +class AbstractPoolManager { +protected: + Treap<> pool; + mutable std::bernoulli_distribution ret_to_pool_distr; + size_t max_index; + +public: + AbstractPoolManager(double ret_prob): + pool(), + ret_to_pool_distr(check_probability(ret_prob)), + max_index(1) + { + pool.Insert(0, 1); + } + + AbstractPoolManager(const AbstractPoolManager&) = delete; + AbstractPoolManager(AbstractPoolManager&&) = delete; + AbstractPoolManager& operator=(const AbstractPoolManager&) = delete; + AbstractPoolManager& operator=(AbstractPoolManager&&) = delete; + + size_t MaxIndex() const { return max_index; } + void Erase(size_t index) { + VERIFY(index < max_index); + pool.Erase(index); + } + + size_t Size() const { return pool.Size(); } + virtual std::pair GetIndex(size_t n_insert) = 0; +}; + +using AbstractPoolManagerCPtr = std::unique_ptr; + + +class UniformPoolManager final : public AbstractPoolManager { +public: + UniformPoolManager(double ret_prob): + AbstractPoolManager(ret_prob) + { } + + std::pair GetIndex(size_t n_insert) override; +}; + +class WideTreePoolManager final : public AbstractPoolManager { +public: + WideTreePoolManager(double ret_prob): + AbstractPoolManager(ret_prob) + { } + + std::pair GetIndex(size_t n_insert) override; +}; + +class DeepTreePoolManager final : public AbstractPoolManager { +public: + DeepTreePoolManager(double ret_prob): + AbstractPoolManager(ret_prob) + { } + + std::pair GetIndex(size_t n_insert) override; +}; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/clonal_trees/tree_creator/shm_creator.cpp b/src/ig_simulator/clonal_trees/tree_creator/shm_creator.cpp new file mode 100644 index 00000000..32b6d0b4 --- /dev/null +++ b/src/ig_simulator/clonal_trees/tree_creator/shm_creator.cpp @@ -0,0 +1,52 @@ +// +// Created by Andrew Bzikadze on 4/12/17. +// + +#include "shm_creator.hpp" +#include "random_generator.hpp" + +namespace ig_simulator { + +Node::SHM_Vector PoissonShmCreator::GenerateSHM_Vector(const std::string& seq) const { + size_t length = seq.length(); + std::uniform_int_distribution ind_distr(fix_left, length - 1 - fix_right); + size_t mut_numb = distribution(MTSingleton::GetInstance()) + 1; + std::vector mut_inds; + mut_inds.reserve(mut_numb); + while(mut_inds.size() < mut_numb) { + size_t ind = ind_distr(MTSingleton::GetInstance()); + if (std::find(mut_inds.begin(), mut_inds.end(), ind) == mut_inds.end()) { + mut_inds.emplace_back(ind); + } + } + + std::uniform_int_distribution mut_distr(0, 2); + Node::SHM_Vector shm_vector; + shm_vector.reserve(mut_numb); + for(const auto& mut_ind : mut_inds) { + seqan::Dna5 old_nucl { seq[mut_ind] }; + size_t ind_nucl_old = old_nucl.value; + size_t ind_nucl_new = mut_distr(MTSingleton::GetInstance()); + seqan::Dna new_nucl { ind_nucl_new < ind_nucl_old ? ind_nucl_new : ((ind_nucl_new + 1) & 3) }; + VERIFY_MSG(old_nucl != 'N' ? old_nucl != new_nucl : true, + std::string("Old nucl: ") << old_nucl + << ", New nucl: " << new_nucl + << " old nucl index: " << ind_nucl_old + << " new nucl index: " << ind_nucl_new + ); + shm_vector.emplace_back(mut_ind, old_nucl, new_nucl); + } + return shm_vector; +} + +AbstractShmCreatorCPtr get_shm_creator(const vj_finder::VJFinderConfig& vjf_config, + const SHM_CreatorParams& config) +{ + using SHM_CreatorMethod = SHM_CreatorParams::SHM_CreatorMethod; + if (config.method == SHM_CreatorMethod::Poisson) { + return std::unique_ptr(new PoissonShmCreator(vjf_config, config.poisson_params)); + } + VERIFY(false); +} + +} // End namespace ig_simulator diff --git a/src/ig_simulator/clonal_trees/tree_creator/shm_creator.hpp b/src/ig_simulator/clonal_trees/tree_creator/shm_creator.hpp new file mode 100644 index 00000000..dbb1c550 --- /dev/null +++ b/src/ig_simulator/clonal_trees/tree_creator/shm_creator.hpp @@ -0,0 +1,62 @@ +// +// Created by Andrew Bzikadze on 4/12/17. +// + +#pragma once + +#include +#include "ig_simulator_utils.hpp" +#include "clonal_trees/tree/node.hpp" +#include +#include "ig_simulator_config.hpp" + +namespace ig_simulator { + +class AbstractShmCreator { +protected: + const size_t fix_left; + const size_t fix_right; + +public: + AbstractShmCreator() = delete; + AbstractShmCreator(const AbstractShmCreator&) = delete; + AbstractShmCreator(AbstractShmCreator&&) = delete; + AbstractShmCreator& operator=(const AbstractShmCreator&) = delete; + AbstractShmCreator& operator=(AbstractShmCreator&&) = delete; + + explicit AbstractShmCreator(const vj_finder::VJFinderConfig& config): + fix_left(config.algorithm_params.fix_crop_fill_params.fix_left), + fix_right(config.algorithm_params.fix_crop_fill_params.fix_right) + { } + + virtual ~AbstractShmCreator() { } + + virtual Node::SHM_Vector GenerateSHM_Vector(const std::string&) const = 0; +}; + +using AbstractShmCreatorCPtr = std::unique_ptr; + + +class PoissonShmCreator final : public AbstractShmCreator { +private: + mutable std::poisson_distribution distribution; + +public: + PoissonShmCreator(const vj_finder::VJFinderConfig& vjf_config, + double lambda): + AbstractShmCreator(vjf_config), + distribution(check_numeric_positive(lambda)) + { } + + PoissonShmCreator(const vj_finder::VJFinderConfig& vjf_config, + const SHM_CreatorParams::PoissonCreatorParams& config): + PoissonShmCreator(vjf_config, config.lambda) + { } + + + Node::SHM_Vector GenerateSHM_Vector(const std::string&) const override; +}; + +AbstractShmCreatorCPtr get_shm_creator(const vj_finder::VJFinderConfig&, const SHM_CreatorParams&); + +} // End namespace ig_simulator \ No newline at end of file diff --git a/src/ig_simulator/clonal_trees/tree_creator/tree_creator.cpp b/src/ig_simulator/clonal_trees/tree_creator/tree_creator.cpp new file mode 100644 index 00000000..b54dd49b --- /dev/null +++ b/src/ig_simulator/clonal_trees/tree_creator/tree_creator.cpp @@ -0,0 +1,9 @@ +// +// Created by Andrew Bzikadze on 4/9/17. +// + +#include "tree_creator.hpp" + +namespace ig_simulator { + +} // End namespace ig_simulator diff --git a/src/ig_simulator/clonal_trees/tree_creator/tree_creator.hpp b/src/ig_simulator/clonal_trees/tree_creator/tree_creator.hpp new file mode 100644 index 00000000..e8506c79 --- /dev/null +++ b/src/ig_simulator/clonal_trees/tree_creator/tree_creator.hpp @@ -0,0 +1,117 @@ +// +// Created by Andrew Bzikadze on 4/9/17. +// + +#pragma once + +#include "clonal_trees/tree/tree.hpp" +#include "pool_manager.hpp" +#include "base_repertoire/metaroot/metaroot.hpp" +#include "shm_creator.hpp" +#include "tree_size_generator.hpp" +#include "clonal_trees/fast_stop_codon_checker/fast_stop_codon_checker.hpp" + +namespace ig_simulator { + +class TreeCreator { +protected: + const AbstractShmCreatorCPtr shm_creator; + const AbstractTreeSizeGeneratorCPtr tree_size_generator; + const double ret_prob; + + mutable std::geometric_distribution distr_n_children; + +private: + std::string CreateSequence(const std::string& base_seq, const Node::SHM_Vector& shms) const { + std::string seq = base_seq; + + for(const auto& shm : shms) { + VERIFY_MSG(seq[std::get<0>(shm)] == std::get<1>(shm), + std::string("real seq: ") << seq << + ", position: " << std::get<0>(shm) << + ", expected: " << std::get<1>(shm)); + seq[std::get<0>(shm)] = std::get<2>(shm); + } + return seq; + } + +public: + TreeCreator(AbstractShmCreatorCPtr&& shm_creator, + AbstractTreeSizeGeneratorCPtr&& tree_size_generator, + double ret_prob, + double lambda_distr_n_children): + shm_creator(std::move(shm_creator)), + tree_size_generator(std::move(tree_size_generator)), + ret_prob(check_numeric_positive(ret_prob)), + distr_n_children(check_numeric_positive(lambda_distr_n_children)) + { } + + TreeCreator(const vj_finder::VJFinderConfig& vjf_config, + const ClonalTreeSimulatorParams& config): + TreeCreator(get_shm_creator(vjf_config, config.shm_creator_params), + get_tree_size_generator(config.tree_size_generator_params), + config.prob_ret_to_pool, + config.lambda_distr_n_children) + { } + + TreeCreator(const TreeCreator&) = delete; + TreeCreator(TreeCreator&&) = delete; + TreeCreator& operator=(const TreeCreator&) = delete; + TreeCreator& operator=(TreeCreator&&) = delete; + + template + Tree GenerateTree(const AbstractMetaroot* const root) const { + static_assert(std::is_base_of::value, + "Pool Manager should be derived from @class AbstractPoolManager"); + + size_t tree_size = tree_size_generator->Generate(); + std::vector nodes; + nodes.reserve(tree_size); + nodes.emplace_back(); + + std::vector sequences; + sequences.reserve(tree_size); + sequences.emplace_back(root->Sequence()); + + if (not root->IsProductive()) { + nodes.back().MakeNonProductive(); + return Tree(root, std::move(nodes), std::move(sequences)); + } + + PoolManager pool_manager(ret_prob); + + while(nodes.size() < tree_size) { + size_t n_children = distr_n_children(MTSingleton::GetInstance()) + 1; + n_children = std::min(n_children, tree_size - nodes.size()); + + size_t parent_ind; + bool stay; + std::tie(parent_ind, stay) = pool_manager.GetIndex(n_children); + + if (not stay) { + nodes[parent_ind].Exclude(); + } + + for (size_t i = 0; i < n_children; ++i) { + const std::string& base_sequence = sequences[parent_ind]; + Node::SHM_Vector shm_vector { shm_creator->GenerateSHM_Vector(base_sequence)}; + std::string sequence = CreateSequence(base_sequence, shm_vector); + + nodes.emplace_back(parent_ind, std::move(shm_vector)); + sequences.emplace_back(std::move(sequence)); + + if (FastStopCodonChecker::HasStopCodon(sequences.back(), root->CDRLabeling())) { + nodes.back().MakeNonProductive(); + pool_manager.Erase(pool_manager.MaxIndex() - n_children + i); + } + } + if (pool_manager.Size() == 0) { break; } // All leafs are non-productive + } + if (pool_manager.Size() != 0) { // Only when leafs all non-productive VERIFY should not be checked. + VERIFY(nodes.size() == tree_size); + } + return Tree(root, std::move(nodes), std::move(sequences)); + } +}; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/clonal_trees/tree_creator/tree_size_generator.cpp b/src/ig_simulator/clonal_trees/tree_creator/tree_size_generator.cpp new file mode 100644 index 00000000..c8778217 --- /dev/null +++ b/src/ig_simulator/clonal_trees/tree_creator/tree_size_generator.cpp @@ -0,0 +1,17 @@ +// +// Created by Andrew Bzikadze on 4/11/17. +// + +#include "tree_size_generator.hpp" + +namespace ig_simulator { + +AbstractTreeSizeGeneratorCPtr get_tree_size_generator(const TreeSizeGeneratorParams& config) { + using Method = TreeSizeGeneratorParams::TreeSizeGeneratorMethod; + if (config.method == Method::Geometric) { + return AbstractTreeSizeGeneratorCPtr(new GeometricTreeSizeGenerator(config.geometric_params)); + } + VERIFY(false); +} + +} // End namespace ig_simulator diff --git a/src/ig_simulator/clonal_trees/tree_creator/tree_size_generator.hpp b/src/ig_simulator/clonal_trees/tree_creator/tree_size_generator.hpp new file mode 100644 index 00000000..95a54c7d --- /dev/null +++ b/src/ig_simulator/clonal_trees/tree_creator/tree_size_generator.hpp @@ -0,0 +1,52 @@ +// +// Created by Andrew Bzikadze on 4/11/17. +// + +#pragma once + +#include +#include + +#include "ig_simulator_utils.hpp" +#include "simulation_routines.hpp" +#include "ig_simulator_config.hpp" + +namespace ig_simulator { + +class AbstractTreeSizeGenerator { +public: + AbstractTreeSizeGenerator() = default; + AbstractTreeSizeGenerator(const AbstractTreeSizeGenerator&) = delete; + AbstractTreeSizeGenerator(AbstractTreeSizeGenerator&&) = delete; + AbstractTreeSizeGenerator& operator=(const AbstractTreeSizeGenerator&) = delete; + AbstractTreeSizeGenerator& operator=(AbstractTreeSizeGenerator&&) = delete; + + virtual size_t Generate() const = 0; + + virtual ~AbstractTreeSizeGenerator() { } +}; + +using AbstractTreeSizeGeneratorCPtr = std::unique_ptr; + +class GeometricTreeSizeGenerator final : public AbstractTreeSizeGenerator { +private: + mutable std::geometric_distribution distribution; + +public: + GeometricTreeSizeGenerator(double lambda): + AbstractTreeSizeGenerator(), + distribution(check_numeric_positive(lambda)) + { } + + GeometricTreeSizeGenerator(const TreeSizeGeneratorParams::GeometricParams& params): + GeometricTreeSizeGenerator(params.lambda) + { } + + size_t Generate() const override { + return distribution(MTSingleton::GetInstance()) + 1; + } +}; + +AbstractTreeSizeGeneratorCPtr get_tree_size_generator(const TreeSizeGeneratorParams& config); + +} // End namespace ig_simulator diff --git a/src/ig_simulator/ig_simulator_config.cpp b/src/ig_simulator/ig_simulator_config.cpp new file mode 100644 index 00000000..f6a4bb31 --- /dev/null +++ b/src/ig_simulator/ig_simulator_config.cpp @@ -0,0 +1,319 @@ +// +// Created by Andrew Bzikadze on 3/15/17. +// + +#include "ig_simulator_config.hpp" +#include +#include + +namespace ig_simulator { + +// IOParams start +void load(IgSimulatorConfig::IOParams::InputParams &input_params, boost::property_tree::ptree const &pt, bool) { + using config_common::load; + load(input_params.germline_input, pt, "germline_input"); + load(input_params.cdr_labeler_config_filename, pt, "cdr_labeler_config_filename"); +} + +void load(IgSimulatorConfig::IOParams::OutputParams &output_params, boost::property_tree::ptree const &pt, bool) { + using config_common::load; + load(output_params.log_filename, pt, "log_filename"); + load(output_params.output_dir, pt, "output_dir"); + load(output_params.base_repertoire_filename, pt, "base_repertoire_filename"); + load(output_params.base_repertoire_info, pt, "base_repertoire_info"); + load(output_params.filtered_pool, pt, "filtered_pool"); + load(output_params.full_pool, pt, "full_pool"); + load(output_params.trees_dir, pt, "trees_dir"); +} + +void load(IgSimulatorConfig::IOParams &io_params, boost::property_tree::ptree const &pt, bool) { + using config_common::load; + load(io_params.input_params, pt, "input_params"); + load(io_params.output_params, pt, "output_params"); +} +// IOParams end + +// SimulationParams start +void load(GeneChooserParams::CustomGeneChooserParams& custom_gene_chooser_params, + boost::property_tree::ptree const &pt, bool) +{ + using config_common::load; + load(custom_gene_chooser_params.v_genes_probs, pt, "v_genes_probs"); + load(custom_gene_chooser_params.v_genes_probs, pt, "d_genes_probs"); + load(custom_gene_chooser_params.v_genes_probs, pt, "j_genes_probs"); +} + + +void load(IgSimulatorConfig::SimulationParams::BaseRepertoireParams::MetarootSimulationParams + ::GeneChooserParams &gene_chooser_params, + boost::property_tree::ptree const &pt, bool) { + using config_common::load; + using GeneChooserMethod = + IgSimulatorConfig::SimulationParams::BaseRepertoireParams:: + MetarootSimulationParams::GeneChooserParams::GeneChooserMethod; + std::string method_str(pt.get("gene_chooser_method")); + std::string method_str_lowercase(method_str); + std::transform(method_str.begin(), method_str.end(), + method_str_lowercase.begin(), ::tolower); + if (method_str == "uniform") { + gene_chooser_params.method = GeneChooserMethod::Uniform; + } else if (method_str == "custom") { + gene_chooser_params.method = GeneChooserMethod::Custom; + load(gene_chooser_params.custom_gene_chooser_params, pt, "custom_chooser_params"); + } else { + VERIFY(false); + } +} + +void load(IgSimulatorConfig::SimulationParams::BaseRepertoireParams::MetarootSimulationParams + ::NucleotidesRemoverParams::UniformRemoverParams &uniform_remover_params, + boost::property_tree::ptree const &pt, bool) { + using config_common::load; + load(uniform_remover_params.max_remove_v_gene, pt, "max_remove_v_gene"); + load(uniform_remover_params.max_remove_d_gene_left, pt, "max_remove_d_gene_left"); + load(uniform_remover_params.max_remove_d_gene_right, pt, "max_remove_d_gene_right"); + load(uniform_remover_params.max_remove_j_gene, pt, "max_remove_j_gene"); +} + +void load(IgSimulatorConfig::SimulationParams::BaseRepertoireParams::MetarootSimulationParams + ::NucleotidesRemoverParams &nucleotides_remover_params, + boost::property_tree::ptree const &pt, bool) { + using config_common::load; + using NucleotidesRemoverMethod = + IgSimulatorConfig::SimulationParams::BaseRepertoireParams:: + MetarootSimulationParams::NucleotidesRemoverParams::NucleotidesRemoverMethod; + std::string method_str(pt.get("nucleotides_remover_method")); + std::string method_str_lowercase(method_str); + std::transform(method_str.begin(), method_str.end(), + method_str_lowercase.begin(), ::tolower); + if (method_str == "uniform") { + nucleotides_remover_params.method = NucleotidesRemoverMethod::Uniform; + load(nucleotides_remover_params.uniform_remover_params, pt, "uniform_remover_params"); + } else { + VERIFY(false); + } +} + +void load(IgSimulatorConfig::SimulationParams::BaseRepertoireParams::MetarootSimulationParams + ::PNucleotidesCreatorParams::UniformCreatorParams &uniform_creator_params, + boost::property_tree::ptree const &pt, bool) { + using config_common::load; + load(uniform_creator_params.max_create_v_gene, pt, "max_create_v_gene"); + load(uniform_creator_params.max_create_d_gene_left, pt, "max_create_d_gene_left"); + load(uniform_creator_params.max_create_d_gene_right, pt, "max_create_d_gene_right"); + load(uniform_creator_params.max_create_j_gene, pt, "max_create_j_gene"); +} + +void load(IgSimulatorConfig::SimulationParams::BaseRepertoireParams::MetarootSimulationParams + ::PNucleotidesCreatorParams &p_nucleptides_creator_params, + boost::property_tree::ptree const &pt, bool) { + using config_common::load; + using PNucleotidesCreatorParams = + IgSimulatorConfig::SimulationParams::BaseRepertoireParams:: + MetarootSimulationParams::PNucleotidesCreatorParams::PNucleotidesCreatorMethod; + std::string method_str(pt.get("p_nucleotides_creator_method")); + std::string method_str_lowercase(method_str); + std::transform(method_str.begin(), method_str.end(), + method_str_lowercase.begin(), ::tolower); + if (method_str == "uniform") { + p_nucleptides_creator_params.method = PNucleotidesCreatorParams::Uniform; + load(p_nucleptides_creator_params.uniform_creator_params, pt, "uniform_creator_params"); + } else { + VERIFY(false); + } +} + +void load(IgSimulatorConfig::SimulationParams::BaseRepertoireParams::MetarootSimulationParams + ::NNucleotidesInserterParams::UniformInserterParams &uniform_inserter_params, + boost::property_tree::ptree const &pt, bool) { + using config_common::load; + load(uniform_inserter_params.max_vj_insertion, pt, "max_vj_insertion"); + load(uniform_inserter_params.max_vd_insertion, pt, "max_vd_insertion"); + load(uniform_inserter_params.max_dj_insertion, pt, "max_dj_insertion"); +} + +void load(IgSimulatorConfig::SimulationParams::BaseRepertoireParams::MetarootSimulationParams + ::NNucleotidesInserterParams &n_nucleotides_inserter_params, + boost::property_tree::ptree const &pt, bool) { + using config_common::load; + using NNucleotidesInserterParams = + IgSimulatorConfig::SimulationParams::BaseRepertoireParams:: + MetarootSimulationParams::NNucleotidesInserterParams::NNucleotidesInserterMethod; + std::string method_str(pt.get("n_nucleotides_method")); + std::string method_str_lowercase(method_str); + std::transform(method_str.begin(), method_str.end(), + method_str_lowercase.begin(), ::tolower); + if (method_str == "uniform") { + n_nucleotides_inserter_params.method = NNucleotidesInserterParams::Uniform; + load(n_nucleotides_inserter_params.uniform_inserter_params, pt, "uniform_inserter_params"); + } else { + VERIFY(false); + } +} + +void load(IgSimulatorConfig::SimulationParams::BaseRepertoireParams::MetarootSimulationParams + ::CleavageParams &cleavage_params, + boost::property_tree::ptree const &pt, bool) { + using config_common::load; + load(cleavage_params.prob_cleavage_v, pt, "prob_cleavage_v"); + load(cleavage_params.prob_cleavage_d_left, pt, "prob_cleavage_d_left"); + load(cleavage_params.prob_cleavage_d_right, pt, "prob_cleavage_d_right"); + load(cleavage_params.prob_cleavage_j, pt, "prob_cleavage_j"); +} + +void load(IgSimulatorConfig::SimulationParams::BaseRepertoireParams + ::MetarootSimulationParams &metaroot_simulation_params, + boost::property_tree::ptree const &pt, bool) +{ + using config_common::load; + load(metaroot_simulation_params.gene_chooser_params, pt, "gene_chooser_params"); + load(metaroot_simulation_params.nucleotides_remover_params, pt, "nucleotides_remover_params"); + load(metaroot_simulation_params.p_nucleotides_creator_params, pt, "p_nucleotides_creator_params"); + load(metaroot_simulation_params.n_nucleotides_inserter_params, pt, "n_nucleotides_inserter_params"); + load(metaroot_simulation_params.cleavage_params, pt, "cleavage_params"); +} + +void load(MultiplicityCreatorParams::GeometricParams &geometric_params, + boost::property_tree::ptree const &pt, bool) +{ + using config_common::load; + load(geometric_params.lambda, pt, "lambda"); +} + +void load(MultiplicityCreatorParams &multiplicity_creator_params, + boost::property_tree::ptree const &pt, bool) +{ + using config_common::load; + using MultiplicityCreatorMethod = MultiplicityCreatorParams::MultiplicityCreatorMethod; + + std::string method_str(pt.get("multiplicity_method")); + std::string method_str_lowercase(method_str); + std::transform(method_str.begin(), method_str.end(), + method_str_lowercase.begin(), ::tolower); + if (method_str == "geometric") { + multiplicity_creator_params.method = MultiplicityCreatorMethod::Geometric; + load(multiplicity_creator_params.geometric_params, pt, "geometric_params"); + } else { + VERIFY(false); + } +} + +void load(ProductiveParams &base_repertoire_params, + boost::property_tree::ptree const &pt, bool) +{ + using config_common::load; + load(base_repertoire_params.productive_part, pt, "productive_part"); +} + +void load(IgSimulatorConfig::SimulationParams::BaseRepertoireParams &base_repertoire_params, + boost::property_tree::ptree const &pt, bool) +{ + using config_common::load; + load(base_repertoire_params.metaroot_simulation_params, pt, "metaroot_simulation_params"); + load(base_repertoire_params.multiplicity_creator_params, pt, "multiplicity_creator_params"); + load(base_repertoire_params.productive_params, pt, "productive_params"); + load(base_repertoire_params.number_of_metaroots, pt, "number_of_metaroots"); +} + +void load(TreeSizeGeneratorParams::GeometricParams &geometric_params, + boost::property_tree::ptree const &pt, bool) +{ + using config_common::load; + load(geometric_params.lambda, pt, "lambda"); +} + +void load(TreeSizeGeneratorParams &tree_size_generator_params, + boost::property_tree::ptree const &pt, bool) +{ + using config_common::load; + using TreeSizeGeneratorMethod = TreeSizeGeneratorParams::TreeSizeGeneratorMethod; + + std::string method_str(pt.get("tree_size_generator_method")); + std::string method_str_lowercase(method_str); + std::transform(method_str.begin(), method_str.end(), + method_str_lowercase.begin(), ::tolower); + if (method_str == "geometric") { + tree_size_generator_params.method = TreeSizeGeneratorMethod::Geometric; + load(tree_size_generator_params.geometric_params, pt, "geometric_params"); + } else { + VERIFY(false); + } +} + +void load(SHM_CreatorParams::PoissonCreatorParams ¶ms, + boost::property_tree::ptree const &pt, bool) +{ + using config_common::load; + load(params.lambda, pt, "lambda"); +} + +void load(SHM_CreatorParams &shm_creator_params, + boost::property_tree::ptree const &pt, bool) { + using config_common::load; + using SHM_CreatorMethod = SHM_CreatorParams::SHM_CreatorMethod; + + std::string method_str(pt.get("shm_creator_method")); + std::string method_str_lowercase(method_str); + std::transform(method_str.begin(), method_str.end(), + method_str_lowercase.begin(), ::tolower); + if (method_str == "poisson") { + shm_creator_params.method = SHM_CreatorMethod::Poisson; + load(shm_creator_params.poisson_params, pt, "poisson_params"); + } else { + VERIFY(false); + } +} + +void load(ClonalTreeSimulatorParams &clonal_tree_simulator_params, + boost::property_tree::ptree const &pt, bool) +{ + using config_common::load; + using PoolManagerStrategy = ClonalTreeSimulatorParams::PoolManagerStrategy; + + std::string method_str(pt.get("pool_manager_strategy")); + std::string method_str_lowercase(method_str); + std::transform(method_str.begin(), method_str.end(), + method_str_lowercase.begin(), ::tolower); + if (method_str == "uniform") { + clonal_tree_simulator_params.pool_manager_strategy = PoolManagerStrategy::UniformPoolManager; + } else if (method_str == "wide") { + clonal_tree_simulator_params.pool_manager_strategy = PoolManagerStrategy::WideTreePoolManager; + } else if (method_str == "deep") { + clonal_tree_simulator_params.pool_manager_strategy = PoolManagerStrategy::DeepTreePoolManager; + } else { + VERIFY(false); + } + + load(clonal_tree_simulator_params.prob_ret_to_pool, pt, "prob_ret_to_pool"); + load(clonal_tree_simulator_params.lambda_distr_n_children, pt, "lambda_distr_n_children"); + load(clonal_tree_simulator_params.tree_size_generator_params, pt, "tree_size_generator_params"); + load(clonal_tree_simulator_params.shm_creator_params, pt, "shm_creator_params"); +} + +void load(IgSimulatorConfig::SimulationParams &simulation_params, + boost::property_tree::ptree const &pt, bool) +{ + using config_common::load; + load(simulation_params.base_repertoire_params, pt, "base_repertoire_params"); + load(simulation_params.clonal_tree_simulator_params, pt, "clonal_tree_simulator_params"); +} +// SimulationParams end + + +void load(IgSimulatorConfig &cfg, boost::property_tree::ptree const &pt, bool complete) { + using config_common::load; + load(cfg.io_params, pt, "io_params", complete); + load(cfg.simulation_params, pt, "simulation_params", complete); + load(cfg.germline_params, pt, "germline_params"); + // TODO remove this hack + cfg.simulation_params.base_repertoire_params. + metaroot_simulation_params.cdr_labeler_config.load(cfg.io_params.input_params.cdr_labeler_config_filename); +} + +void load(IgSimulatorConfig &cfg, std::string const &filename) { + boost::property_tree::ptree pt; + boost::property_tree::read_info(filename, pt); + load(cfg, pt, true); +} + +} // End namespace ig_simulator diff --git a/src/ig_simulator/ig_simulator_config.hpp b/src/ig_simulator/ig_simulator_config.hpp new file mode 100644 index 00000000..64a5d10a --- /dev/null +++ b/src/ig_simulator/ig_simulator_config.hpp @@ -0,0 +1,193 @@ +// +// Created by Andrew Bzikadze on 3/15/17. +// + +#pragma once + +#include "io/library.hpp" +#include +#include "config_singl.hpp" +#include "germline_utils/germline_config.hpp" +#include "cdr_config.hpp" + +namespace ig_simulator { + +struct IgSimulatorConfig { + struct IOParams { + struct InputParams { + germline_utils::GermlineInput germline_input; + std::string cdr_labeler_config_filename; + }; + + struct OutputParams { + std::string output_dir; + std::string log_filename; + std::string base_repertoire_filename; + std::string base_repertoire_info; + std::string filtered_pool; + std::string full_pool; + std::string trees_dir; + }; + + InputParams input_params; + OutputParams output_params; + }; + + + struct SimulationParams { + struct BaseRepertoireParams { + struct MetarootSimulationParams { + struct GeneChooserParams { + struct CustomGeneChooserParams { + std::string v_genes_probs; + std::string d_genes_probs; + std::string j_genes_probs; + }; + + enum class GeneChooserMethod { Uniform, Custom }; + GeneChooserMethod method; + CustomGeneChooserParams custom_gene_chooser_params; + }; + + struct NucleotidesRemoverParams { + enum class NucleotidesRemoverMethod { Uniform }; + struct UniformRemoverParams { + size_t max_remove_v_gene; + size_t max_remove_d_gene_left; + size_t max_remove_d_gene_right; + size_t max_remove_j_gene; + }; + NucleotidesRemoverMethod method; + UniformRemoverParams uniform_remover_params; + }; + + struct PNucleotidesCreatorParams { + enum class PNucleotidesCreatorMethod { Uniform }; + struct UniformCreatorParams { + size_t max_create_v_gene; + size_t max_create_d_gene_left; + size_t max_create_d_gene_right; + size_t max_create_j_gene; + }; + PNucleotidesCreatorMethod method; + UniformCreatorParams uniform_creator_params; + }; + + struct NNucleotidesInserterParams { + enum class NNucleotidesInserterMethod { Uniform }; + struct UniformInserterParams { + size_t max_vj_insertion; + size_t max_vd_insertion; + size_t max_dj_insertion; + }; + NNucleotidesInserterMethod method; + UniformInserterParams uniform_inserter_params; + }; + + struct CleavageParams { + double prob_cleavage_v; + double prob_cleavage_d_left; + double prob_cleavage_d_right; + double prob_cleavage_j; + }; + + GeneChooserParams gene_chooser_params; + NucleotidesRemoverParams nucleotides_remover_params; + PNucleotidesCreatorParams p_nucleotides_creator_params; + NNucleotidesInserterParams n_nucleotides_inserter_params; + CleavageParams cleavage_params; + cdr_labeler::CDRLabelerConfig cdr_labeler_config; + }; + + struct MultiplicityCreatorParams { + struct GeometricParams { + double lambda; + }; + + enum class MultiplicityCreatorMethod { Geometric }; + MultiplicityCreatorMethod method; + GeometricParams geometric_params; + }; + + struct ProductiveParams { + double productive_part; + }; + + MetarootSimulationParams metaroot_simulation_params; + MultiplicityCreatorParams multiplicity_creator_params; + ProductiveParams productive_params; + + size_t number_of_metaroots; + }; + + struct ClonalTreeSimulatorParams { + struct TreeSizeGeneratorParams { + struct GeometricParams { + double lambda; + }; + + enum class TreeSizeGeneratorMethod { Geometric }; + TreeSizeGeneratorMethod method; + GeometricParams geometric_params; + }; + + struct SHM_CreatorParams { + struct PoissonCreatorParams { + double lambda; + }; + + enum class SHM_CreatorMethod { Poisson }; + SHM_CreatorMethod method; + PoissonCreatorParams poisson_params; + }; + + enum class PoolManagerStrategy { UniformPoolManager, WideTreePoolManager, DeepTreePoolManager }; + PoolManagerStrategy pool_manager_strategy; + + double prob_ret_to_pool; + double lambda_distr_n_children; + TreeSizeGeneratorParams tree_size_generator_params; + SHM_CreatorParams shm_creator_params; + }; + + BaseRepertoireParams base_repertoire_params; + ClonalTreeSimulatorParams clonal_tree_simulator_params; + }; + + IOParams io_params; + germline_utils::GermlineParams germline_params; + SimulationParams simulation_params; +}; + +using BaseRepertoireParams = IgSimulatorConfig::SimulationParams::BaseRepertoireParams; +using ClonalTreeSimulatorParams = IgSimulatorConfig::SimulationParams::ClonalTreeSimulatorParams; + +using MetarootSimulationParams = BaseRepertoireParams::MetarootSimulationParams; +using MultiplicityCreatorParams = BaseRepertoireParams::MultiplicityCreatorParams; +using ProductiveParams = BaseRepertoireParams::ProductiveParams; + +using MultiplicityCreatorMethod = MultiplicityCreatorParams::MultiplicityCreatorMethod; + +using GeneChooserParams = MetarootSimulationParams::GeneChooserParams; +using GeneChooserMethod = GeneChooserParams::GeneChooserMethod; + +using NucleotidesRemoverParams = MetarootSimulationParams::NucleotidesRemoverParams; +using NucleotidesRemoverMethod = NucleotidesRemoverParams::NucleotidesRemoverMethod; + +using PNucleotidesCreatorParams = MetarootSimulationParams::PNucleotidesCreatorParams; +using PNucleotidesCreatorMethod = PNucleotidesCreatorParams::PNucleotidesCreatorMethod; + +using NNucleotidesInserterParams = MetarootSimulationParams::NNucleotidesInserterParams; +using NNucleotidesInserterMethod = NNucleotidesInserterParams::NNucleotidesInserterMethod; + +using CleavageParams = MetarootSimulationParams::CleavageParams; + +using TreeSizeGeneratorParams = ClonalTreeSimulatorParams::TreeSizeGeneratorParams; +using SHM_CreatorParams = ClonalTreeSimulatorParams::SHM_CreatorParams; +using PoolManagerStrategy = ClonalTreeSimulatorParams::PoolManagerStrategy; + +void load(IgSimulatorConfig &cfg, std::string const &filename); + +typedef config_common::config igs_cfg; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/ig_simulator_launch.cpp b/src/ig_simulator/ig_simulator_launch.cpp new file mode 100644 index 00000000..a4d101e9 --- /dev/null +++ b/src/ig_simulator/ig_simulator_launch.cpp @@ -0,0 +1,122 @@ +// +// Created by Andrew Bzikadze on 3/15/17. +// + +#include + +#include +#include +#include "ig_simulator_launch.hpp" +#include "base_repertoire/base_repertoire_simulator.hpp" +#include "clonal_trees/tree_creator/forest_storage_creator.hpp" + +using namespace germline_utils; + +namespace ig_simulator { + +germline_utils::ChainType IgSimulatorLaunch::GetLaunchChainType() const { + auto v_chain_type = germline_utils::LociParam::ConvertIntoChainTypes(config_.germline_params.loci); + VERIFY_MSG(v_chain_type.size() == 1, "Only specific chain type is allowed"); + return v_chain_type[0]; +} + +std::vector +IgSimulatorLaunch::GetDB(const germline_utils::ChainType chain_type) const +{ + GermlineDbGenerator db_generator(config_.io_params.input_params.germline_input, + config_.germline_params); + INFO("Generation of DB for variable segments..."); + germline_utils::CustomGeneDatabase v_db = db_generator.GenerateVariableDb(); + INFO("Generation of DB for diversity segments..."); + germline_utils::CustomGeneDatabase d_db = db_generator.GenerateDiversityDb(); + INFO("Generation of DB for join segments..."); + germline_utils::CustomGeneDatabase j_db = db_generator.GenerateJoinDb(); + + std::vector db; + db.emplace_back(std::move(v_db)); + if (chain_type.IsVDJ()) + db.emplace_back(std::move(d_db)); + db.emplace_back(std::move(j_db)); + return db; +} + +BaseRepertoire +IgSimulatorLaunch::GetBaseRepertoire(const germline_utils::ChainType chain_type, + std::vector& db) const +{ + INFO("== Base Repertoire starts =="); + BaseRepertoireSimulator base_repertoire_simulator{config_.simulation_params.base_repertoire_params, + chain_type, + db}; + auto base_repertoire = + base_repertoire_simulator.Simulate(config_.simulation_params.base_repertoire_params.number_of_metaroots); + std::ofstream base_repertoire_fasta; + std::ofstream base_repertoire_info; + base_repertoire_fasta.open(path::append_path(config_.io_params.output_params.output_dir, + config_.io_params.output_params.base_repertoire_filename)); + base_repertoire_info.open(path::append_path(config_.io_params.output_params.output_dir, + config_.io_params.output_params.base_repertoire_info)); + print_base_repertoire(base_repertoire, base_repertoire_fasta, base_repertoire_info); + base_repertoire_fasta.close(); + base_repertoire_info.close(); + INFO("== Base Repertoire ends =="); + return base_repertoire; +} + +template +ForestStorage IgSimulatorLaunch::__GetForestStorage(const BaseRepertoire& base_repertoire) const +{ + INFO("== Forest Storage generation starts =="); + const auto& vjf_config = config_.simulation_params.base_repertoire_params.metaroot_simulation_params. + cdr_labeler_config.vj_finder_config; + ForestStorageCreator forest_storage_creator(vjf_config, + config_.simulation_params.clonal_tree_simulator_params); + auto forest_storage = forest_storage_creator.GenerateForest(base_repertoire); + INFO("== Forest Storage generation ends =="); + + INFO("== Forest Storage export starts =="); + INFO("== Full and filtered pool export start"); + std::ofstream full, included; + full.open(path::append_path(config_.io_params.output_params.output_dir, + config_.io_params.output_params.full_pool)); + included.open(path::append_path(config_.io_params.output_params.output_dir, + config_.io_params.output_params.filtered_pool)); + ForestStorageExporter(forest_storage, full, included); + full.close(); + included.close(); + INFO("== Full and filtered pool export ends"); + + INFO("== Edge lists export starts"); + EdgeListsExporters(forest_storage, config_.io_params.output_params); + INFO("== Edge lists export ends"); + INFO("== Forest Storage export ends =="); + return forest_storage; +} + +ForestStorage IgSimulatorLaunch::GetForestStorage(const BaseRepertoire& base_repertoire) const +{ + const auto& pool_manager_strategy = config_.simulation_params.clonal_tree_simulator_params.pool_manager_strategy; + if (pool_manager_strategy == PoolManagerStrategy::UniformPoolManager) { + return __GetForestStorage(base_repertoire); + } else if (pool_manager_strategy == PoolManagerStrategy::DeepTreePoolManager) { + return __GetForestStorage(base_repertoire); + } else if (pool_manager_strategy == PoolManagerStrategy::WideTreePoolManager) { + return __GetForestStorage(base_repertoire); + } + VERIFY(false); +} + +void IgSimulatorLaunch::Run() { + // MTSingleton::SetSeed(1); + INFO("== IgSimulator starts =="); + + germline_utils::ChainType chain_type = GetLaunchChainType(); + std::vector db { GetDB(chain_type) }; + + const BaseRepertoire base_repertoire = GetBaseRepertoire(chain_type, db); + const ForestStorage forest_storage = GetForestStorage(base_repertoire); + + INFO("== IgSimulator ends =="); +} + +} // End namespace ig_simulator diff --git a/src/ig_simulator/ig_simulator_launch.hpp b/src/ig_simulator/ig_simulator_launch.hpp new file mode 100644 index 00000000..f61323ce --- /dev/null +++ b/src/ig_simulator/ig_simulator_launch.hpp @@ -0,0 +1,47 @@ +// +// Created by Andrew Bzikadze on 3/15/17. +// + +#pragma once + +#include "ig_simulator_config.hpp" +#include "germline_utils/chain_type.hpp" +#include "base_repertoire/base_repertoire.hpp" +#include "clonal_trees/forest/forest.hpp" + +namespace ig_simulator { + +class IgSimulatorLaunch { +private: + IgSimulatorConfig config_; + +private: + germline_utils::ChainType GetLaunchChainType() const; + + std::vector + GetDB(const germline_utils::ChainType chain_type) const; + + BaseRepertoire + GetBaseRepertoire(const germline_utils::ChainType chain_type, + std::vector& db) const; + + template + ForestStorage __GetForestStorage(const BaseRepertoire& base_repertoire) const; + + ForestStorage GetForestStorage(const BaseRepertoire& base_repertoire) const; + +public: + IgSimulatorLaunch(const IgSimulatorConfig &config) : + config_(config) + { } + + void Run(); + + IgSimulatorLaunch() = delete; + IgSimulatorLaunch(const IgSimulatorLaunch&) = delete; + IgSimulatorLaunch(IgSimulatorLaunch&&) = delete; + IgSimulatorLaunch& operator=(const IgSimulatorLaunch&) = delete; + IgSimulatorLaunch& operator=(IgSimulatorLaunch&&) = delete; +}; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/ig_simulator_utils.hpp b/src/ig_simulator/ig_simulator_utils.hpp new file mode 100644 index 00000000..88c0263f --- /dev/null +++ b/src/ig_simulator/ig_simulator_utils.hpp @@ -0,0 +1,38 @@ +// +// Created by Andrew Bzikadze on 3/31/17. +// + +#pragma once + +#include "verify.hpp" + +namespace ig_simulator { + +template +const Pointer& check_pointer(const Pointer& p) { + VERIFY(p != nullptr); + return p; +} + +template +T check_numeric_nonnegative(const T x) { + static_assert(std::is_arithmetic::value, "Type has to be arithmetic"); + VERIFY(x >= 0); + return x; +} + +template +T check_numeric_positive(const T x) { + static_assert(std::is_arithmetic::value, "Type has to be arithmetic"); + VERIFY(x > 0); + return x; +} + +template +T check_probability(const T x) { + static_assert(std::is_floating_point::value, "Probability is floating point"); + VERIFY(x >= 0 and x <= 1); + return x; +} + +} // End namespace ig_simulator diff --git a/src/ig_simulator/main.cpp b/src/ig_simulator/main.cpp new file mode 100644 index 00000000..df8bc959 --- /dev/null +++ b/src/ig_simulator/main.cpp @@ -0,0 +1,73 @@ +#include +#include +#include + +#include + +#include "ig_simulator_config.hpp" +#include "ig_simulator_launch.hpp" + +void create_console_logger(std::string cfg_filename) { + using namespace logging; + std::string log_props_file = ig_simulator::igs_cfg::get().io_params.output_params.log_filename; + if (!path::FileExists(log_props_file)){ + log_props_file = path::append_path(path::parent_path(cfg_filename), log_props_file); + } + logger *lg = create_logger(path::FileExists(log_props_file) ? log_props_file : ""); + lg->add_writer(std::make_shared()); + attach_logger(lg); +} + +std::string running_time_format(const perf_counter &pc) { + unsigned ms = (unsigned)pc.time_ms(); + unsigned secs = (ms / 1000) % 60; + unsigned mins = (ms / 1000 / 60) % 60; + unsigned hours = (ms / 1000 / 60 / 60); + boost::format bf("%u hours %u minutes %u seconds"); + bf % hours % mins % secs; + return bf.str(); +} + +void prepare_output_dir(const ig_simulator::IgSimulatorConfig::IOParams::OutputParams & of) { + path::make_dir(of.output_dir); +} + +void copy_configs(std::string cfg_filename, std::string to) { + path::make_dir(to); + path::copy_files_by_ext(path::parent_path(cfg_filename), to, ".info", true); + path::copy_files_by_ext(path::parent_path(cfg_filename), to, ".properties", true); +} + +std::string get_config_fname(int argc, char **argv) { + if(argc == 2) + return std::string(argv[1]); + return "configs/ig_simulator/config.info"; +} + +std::string load_config(int argc, char **argv) { + std::string cfg_filename = get_config_fname(argc, argv); + if (!path::FileExists(cfg_filename)) { + std::cout << "File " << cfg_filename << " doesn't exist or can't be read!" << std::endl; + exit(-1); + } + ig_simulator::igs_cfg::create_instance(cfg_filename); + prepare_output_dir(ig_simulator::igs_cfg::get().io_params.output_params); + std::string path_to_copy = + path::append_path(ig_simulator::igs_cfg::get().io_params.output_params.output_dir, "configs"); + path::make_dir(path_to_copy); + copy_configs(cfg_filename, path_to_copy); + return cfg_filename; +} + +int main(int argc, char **argv) { + omp_set_num_threads(1); + + segfault_handler sh; + perf_counter pc; + std::string cfg_filename = load_config(argc, argv); + create_console_logger(cfg_filename); + // variable extracted to avoid a possible bug in gcc 4.8.4 + const auto& cfg = ig_simulator::igs_cfg::get(); + ig_simulator::IgSimulatorLaunch(cfg).Run(); + return 0; +} diff --git a/src/ig_simulator/random_generator.hpp b/src/ig_simulator/random_generator.hpp new file mode 100644 index 00000000..52216315 --- /dev/null +++ b/src/ig_simulator/random_generator.hpp @@ -0,0 +1,35 @@ +// +// Created by Andrew Bzikadze on 3/17/17. +// + +#pragma once + +#include + +namespace ig_simulator { + +// This code is written after consulting with @eodus +template +class RandomGeneratorSingleton { +private: + STLRandomGenerator generator_; + +private: + RandomGeneratorSingleton(Sseq seed=std::random_device()()) : + generator_(seed) + { } + +public: + static void SetSeed(Sseq seed = std::random_device()()) { + RandomGeneratorSingleton::GetInstance().seed(seed); + } + + static STLRandomGenerator& GetInstance() { + static RandomGeneratorSingleton rg; + return rg.generator_; + } +}; + +using MTSingleton = RandomGeneratorSingleton; + +} // End namespace ig_simulator diff --git a/src/ig_simulator/simulation_routines.cpp b/src/ig_simulator/simulation_routines.cpp new file mode 100644 index 00000000..58578ba2 --- /dev/null +++ b/src/ig_simulator/simulation_routines.cpp @@ -0,0 +1,21 @@ +// +// Created by Andrew Bzikadze on 3/20/17. +// + +#include "simulation_routines.hpp" + +namespace ig_simulator { + +size_t random_index(size_t low, size_t high) { + std::uniform_int_distribution d(low, high); + return d(MTSingleton::GetInstance()); +} + +template +double uniform_floating_point(FloatingPoint low, FloatingPoint high) { + static_assert(std::is_floating_point::value, "Type has to be floating point"); + std::uniform_real_distribution d(low, high); + return d(MTSingleton::GetInstance()); +} + +} // End namespace ig_simulator diff --git a/src/ig_simulator/simulation_routines.hpp b/src/ig_simulator/simulation_routines.hpp new file mode 100644 index 00000000..cab69fff --- /dev/null +++ b/src/ig_simulator/simulation_routines.hpp @@ -0,0 +1,18 @@ +// +// Created by Andrew Bzikadze on 3/20/17. +// + +#pragma once + +#include +#include +#include "random_generator.hpp" + +namespace ig_simulator { + +size_t random_index(size_t low = 0, size_t high = std::numeric_limits::max()); + +template +double uniform_floating_point(FloatingPoint low = 0., FloatingPoint high = 0.); + +} // End namespace ig_simulator diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index f8abc2a0..39bbd75d 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -5,6 +5,7 @@ include_directories(${VDJ_UTILS_DIR}) include_directories(${ALGORITHMS_DIR}) include_directories(${VJ_FINDER_DIR}) include_directories(${CDR_LABELER_DIR}) +include_directories(${IG_SIMULATOR_DIR}) link_libraries(graph_utils vdj_utils algorithms core input ${COMMON_LIBRARIES}) @@ -23,3 +24,6 @@ target_link_libraries(test_cdr_labeling cdr_labeler_library) make_test(test_vj_finder test_vj_finder.cpp) target_link_libraries(test_vj_finder vj_finder_library) + +make_test(test_ig_simulator test_ig_simulator.cpp) +target_link_libraries(test_ig_simulator ig_simulator_library) diff --git a/src/test/test_cdr_labeler.cpp b/src/test/test_cdr_labeler.cpp index 71c83592..d6edc741 100644 --- a/src/test/test_cdr_labeler.cpp +++ b/src/test/test_cdr_labeler.cpp @@ -4,7 +4,7 @@ #include #include -#include +#include #include #include #include @@ -28,8 +28,8 @@ class CDRLabelerTest : public ::testing::Test { std::string config_fname = "configs/cdr_labeler/config.info"; config.load(config_fname); config.vj_finder_config.algorithm_params.germline_params.loci = "IG"; - vj_finder::GermlineDbGenerator db_generator(config.vj_finder_config.io_params.input_params.germline_input, - config.vj_finder_config.algorithm_params.germline_params); + germline_utils::GermlineDbGenerator db_generator(config.vj_finder_config.io_params.input_params.germline_input, + config.vj_finder_config.algorithm_params.germline_params); auto v_gene_database = db_generator.GenerateVariableDb(); auto j_gene_database = db_generator.GenerateJoinDb(); auto v_labeling = cdr_labeler::GermlineDbLabeler(v_gene_database, config.cdrs_params).ComputeLabeling(); diff --git a/src/test/test_ig_simulator.cpp b/src/test/test_ig_simulator.cpp new file mode 100644 index 00000000..1e50a7cc --- /dev/null +++ b/src/test/test_ig_simulator.cpp @@ -0,0 +1,302 @@ +// +// Created by Andrew Bzikadze on 3/23/17. +// + +#include +#include + +#include + +#include +#include +#include +#include +#include "base_repertoire/metaroot/metaroot.hpp" +#include +#include "convert.hpp" + +#include "base_repertoire/gene_chooser/uniform_gene_chooser.hpp" +#include "base_repertoire/nucleotides_remover/uniform_nucleotides_remover.hpp" +#include "base_repertoire/p_nucleotides_creator/uniform_nucleotides_creator.hpp" +#include "base_repertoire/n_nucleotides_inserter/uniform_n_nucleotides_inserter.hpp" +#include "base_repertoire/metaroot_creator/metaroot_creator.hpp" +#include "annotation_utils/cdr_labeling_primitives.hpp" +#include "base_repertoire/productivity_checker/productivity_checker.hpp" +#include "annotation_utils/aa_annotation/aa_calculator.hpp" + +#include +#include + +void create_console_logger() { + using namespace logging; + logger *lg = create_logger(""); + lg->add_writer(std::make_shared()); + attach_logger(lg); +} + +ig_simulator::IgSimulatorConfig config; +germline_utils::CustomGeneDatabase v_db(germline_utils::SegmentType::VariableSegment); +germline_utils::CustomGeneDatabase d_db(germline_utils::SegmentType::DiversitySegment); +germline_utils::CustomGeneDatabase j_db(germline_utils::SegmentType::JoinSegment); + +namespace ig_simulator { + +class IgSimulatorTest: public ::testing::Test { +public: + void SetUp() { + omp_set_num_threads(1); + create_console_logger(); + std::string config_fname = "configs/ig_simulator/config.info"; + ig_simulator::load(config, config_fname); + config.germline_params.loci = "IGH"; + + germline_utils::GermlineDbGenerator db_generator(config.io_params.input_params.germline_input, + config.germline_params); + v_db = db_generator.GenerateVariableDb(); + d_db = db_generator.GenerateDiversityDb(); + j_db = db_generator.GenerateJoinDb(); + } +}; + +TEST_F(IgSimulatorTest, PrepareGeneTest) { + { + seqan::Dna5String gene("GTACAACTGGAACG"); + AbstractMetaroot::PrepareGene(gene, 0, 1); + ASSERT_EQ(core::seqan_string_to_string(gene), "GTACAACTGGAAC"); + AbstractMetaroot::PrepareGene(gene, 2, 1); + ASSERT_EQ(core::seqan_string_to_string(gene), "ACAACTGGAA"); + AbstractMetaroot::PrepareGene(gene, 5, 3); + ASSERT_EQ(core::seqan_string_to_string(gene), "TG"); + AbstractMetaroot::PrepareGene(gene, -2, -2); + ASSERT_EQ(core::seqan_string_to_string(gene), "CATGCA"); + AbstractMetaroot::PrepareGene(gene, -3, 2); + ASSERT_EQ(core::seqan_string_to_string(gene), "ATGCATG"); + } +} + +TEST_F(IgSimulatorTest, VDJMetarootSequenceCorrect) { + { + std::string vd_ins("ACCGT"); + std::string dj_ins("TTTT"); + VDJMetaroot root(&v_db, &d_db, &j_db, + 0, 0, 0, + annotation_utils::CDRLabeling(), + 5, 1, 2, 3, + vd_ins, dj_ins); + std::string correct_root_seq( + std::string( + "CAGGTTCAGCTGGTGCAGTCTGGAGCTGAGGTGAAGAAGCCTGGGGCCTCAGTGAAGGTCTCCTGCAAGGCTTCTGGTTACACCTTT" + "ACCAGCTATGGTATCAGCTGGGTGCGACAGGCCCCTGGACAAGGGCTTGAGTGGATGGGATGGATCAGCGCTTACAATGGTAACACA" + "AACTATGCACAGAAGCTCCAGGGCAGAGTCACCATGACCACAGACACATCCACGAGCACAGCCTACATGGAGCTGAGGAGCCTGAGA" + "TCTGACGACACGGCCGTGTATTACTGTGCG") + + vd_ins + + "GTACAACTGGAACG" + + dj_ins + + "GAATACTTCCAGCACTGGGGCCAGGGCACCCTGGTCACCGTCTCCTCAG"); + ASSERT_EQ(correct_root_seq, core::dna5String_to_string(root.Sequence())); + } + + { + std::string vd_ins("ACCGT"); + std::string dj_ins("TTTT"); + VDJMetaroot root(&v_db, &d_db, &j_db, + 0, 0, 0, + annotation_utils::CDRLabeling(), + -5, -2, -2, -3, + vd_ins, dj_ins); + std::string correct_root_seq( + std::string( + "CAGGTTCAGCTGGTGCAGTCTGGAGCTGAGGTGAAGAAGCCTGGGGCCTCAGTGAAGGTCTCCTGCAAGGCTTCTGGTTACACCTTT" + "ACCAGCTATGGTATCAGCTGGGTGCGACAGGCCCCTGGACAAGGGCTTGAGTGGATGGGATGGATCAGCGCTTACAATGGTAACACA" + "AACTATGCACAGAAGCTCCAGGGCAGAGTCACCATGACCACAGACACATCCACGAGCACAGCCTACATGGAGCTGAGGAGCCTGAGA" + "TCTGACGACACGGCCGTGTATTACTGTGCGAGAGA") + + "TCTCT" + + vd_ins + + "CC" + + "GGTACAACTGGAACGAC" + + "GT" + + dj_ins + + "AGC" + + "GCTGAATACTTCCAGCACTGGGGCCAGGGCACCCTGGTCACCGTCTCCTCAG"); + ASSERT_EQ(correct_root_seq, core::dna5String_to_string(root.Sequence())); + } + { + std::string vd_ins("ACCGT"); + std::string dj_ins("TTTT"); + VDJMetaroot root(&v_db, &d_db, &j_db, + 0, 0, 0, + annotation_utils::CDRLabeling(), + -5, 0, -3, -3, + vd_ins, dj_ins); + std::string correct_root_seq( + std::string( + "CAGGTTCAGCTGGTGCAGTCTGGAGCTGAGGTGAAGAAGCCTGGGGCCTCAGTGAAGGTCTCCTGCAAGGCTTCTGGTTACACCTTT" + "ACCAGCTATGGTATCAGCTGGGTGCGACAGGCCCCTGGACAAGGGCTTGAGTGGATGGGATGGATCAGCGCTTACAATGGTAACACA" + "AACTATGCACAGAAGCTCCAGGGCAGAGTCACCATGACCACAGACACATCCACGAGCACAGCCTACATGGAGCTGAGGAGCCTGAGA" + "TCTGACGACACGGCCGTGTATTACTGTGCGAGAGA") + + "TCTCT" + + vd_ins + + "GGTACAACTGGAACGAC" + + "GTC" + + dj_ins + + "AGC" + + "GCTGAATACTTCCAGCACTGGGGCCAGGGCACCCTGGTCACCGTCTCCTCAG"); + ASSERT_EQ(correct_root_seq, core::dna5String_to_string(root.Sequence())); + } + + { + std::string vd_ins("ACCGT"); + std::string dj_ins("TTTT"); + VDJMetaroot root(&v_db, &d_db, &j_db, + 0, 0, 0, + annotation_utils::CDRLabeling(), + 0, 0, 0, 0, + vd_ins, dj_ins); + std::string correct_root_seq( + std::string( + "CAGGTTCAGCTGGTGCAGTCTGGAGCTGAGGTGAAGAAGCCTGGGGCCTCAGTGAAGGTCTCCTGCAAGGCTTCTGGTTACACCTTT" + "ACCAGCTATGGTATCAGCTGGGTGCGACAGGCCCCTGGACAAGGGCTTGAGTGGATGGGATGGATCAGCGCTTACAATGGTAACACA" + "AACTATGCACAGAAGCTCCAGGGCAGAGTCACCATGACCACAGACACATCCACGAGCACAGCCTACATGGAGCTGAGGAGCCTGAGA" + "TCTGACGACACGGCCGTGTATTACTGTGCGAGAGA") + + vd_ins + + "GGTACAACTGGAACGAC" + + dj_ins + + "GCTGAATACTTCCAGCACTGGGGCCAGGGCACCCTGGTCACCGTCTCCTCAG"); + ASSERT_EQ(correct_root_seq, core::dna5String_to_string(root.Sequence())); + } +} + +TEST_F(IgSimulatorTest, VJMetarootSequenceCorrect) { + { + std::string vj_ins("ACCGT"); + VJMetaroot root(&v_db, &j_db, + 0, 0, + annotation_utils::CDRLabeling(), + 5, 3, vj_ins); + std::string correct_root_seq( + std::string( + "CAGGTTCAGCTGGTGCAGTCTGGAGCTGAGGTGAAGAAGCCTGGGGCCTCAGTGAAGGTCTCCTGCAAGGCTTCTGGTTACACCTTT" + "ACCAGCTATGGTATCAGCTGGGTGCGACAGGCCCCTGGACAAGGGCTTGAGTGGATGGGATGGATCAGCGCTTACAATGGTAACACA" + "AACTATGCACAGAAGCTCCAGGGCAGAGTCACCATGACCACAGACACATCCACGAGCACAGCCTACATGGAGCTGAGGAGCCTGAGA" + "TCTGACGACACGGCCGTGTATTACTGTGCG") + + vj_ins + + "GAATACTTCCAGCACTGGGGCCAGGGCACCCTGGTCACCGTCTCCTCAG"); + ASSERT_EQ(correct_root_seq, core::dna5String_to_string(root.Sequence())); + } +} + +//TEST_F(IgSimulatorTest, MetarootCreaterSpeedTest) { +// { +// config.algorithm_params.germline_params.loci = "IGH"; +// +// germline_utils::GermlineDbGenerator db_generator(config.io_params.input_params.germline_input, +// config.algorithm_params.germline_params); +// v_db = db_generator.GenerateVariableDb(); +// d_db = db_generator.GenerateDiversityDb(); +// j_db = db_generator.GenerateJoinDb(); +// +// VDJMetarootCreator metaroot_creator(config.simulation_params.base_repertoire_params.metaroot_simulation_params, +// &v_db, &d_db, &j_db); +// +// auto t1 = std::chrono::high_resolution_clock::now(); +// size_t N((int) 1e5); +// for (size_t i = 0; i < N; ++i) { +// auto root = metaroot_creator.Createroot(); +// } +// auto t2 = std::chrono::high_resolution_clock::now(); +// std::chrono::duration fp = t2 - t1; +// std::cout << "Simulation of " << N << " VDJ metaroots took " << fp.count() << "ms" << std::endl; +// } +// +// { +// config.algorithm_params.germline_params.loci = "IGL"; +// +// germline_utils::GermlineDbGenerator db_generator(config.io_params.input_params.germline_input, +// config.algorithm_params.germline_params); +// v_db = db_generator.GenerateVariableDb(); +// INFO("Generation of DB for join segments..."); +// j_db = db_generator.GenerateJoinDb(); +// +// VDJMetarootCreator metaroot_creator(config.simulation_params.base_repertoire_params.metaroot_simulation_params, +// &v_db, &d_db, &j_db); +// +// auto t1 = std::chrono::high_resolution_clock::now(); +// size_t N((int) 1e5); +// for (size_t i = 0; i < N; ++i) { +// metaroot_creator.Createroot()->Sequence(); +// } +// auto t2 = std::chrono::high_resolution_clock::now(); +// std::chrono::duration fp = t2 - t1; +// std::cout << "Simulation of " << N << " VJ metaroots took " << fp.count() << "ms" << std::endl; +// } +//} + +// TEST_F(IgSimulatorTest, MetarootCreaterCDRTest) { +// { +// config.algorithm_params.germline_params.loci = "IGH"; +// +// germline_utils::GermlineDbGenerator db_generator(config.io_params.input_params.germline_input, +// config.algorithm_params.germline_params); +// v_db = db_generator.GenerateVariableDb(); +// d_db = db_generator.GenerateDiversityDb(); +// j_db = db_generator.GenerateJoinDb(); +// +// std::vector db; +// db.emplace_back(std::move(v_db)); +// db.emplace_back(std::move(d_db)); +// db.emplace_back(std::move(j_db)); +// +// VDJMetarootCreator metaroot_creator(config.simulation_params.base_repertoire_params.metaroot_simulation_params, +// db); +// +// MTSingleton::SetSeed(7); +// auto root = metaroot_creator.Createroot(); +// INFO(*root); +// INFO(root->Sequence()); +// ASSERT_EQ(root->CDRLabeling().cdr1.start_pos, 75); +// ASSERT_EQ(root->CDRLabeling().cdr1.end_pos, 98); +// ASSERT_EQ(root->CDRLabeling().cdr2.start_pos, 150); +// ASSERT_EQ(root->CDRLabeling().cdr2.end_pos, 173); +// ASSERT_EQ(root->CDRLabeling().cdr3.start_pos, 288); +// ASSERT_EQ(root->CDRLabeling().cdr3.end_pos, 366); +// } +// } + +TEST_F(IgSimulatorTest, ProductiveChecker) { + { + config.germline_params.loci = "IGH"; + + germline_utils::GermlineDbGenerator db_generator(config.io_params.input_params.germline_input, + config.germline_params); + v_db = db_generator.GenerateVariableDb(); + d_db = db_generator.GenerateDiversityDb(); + j_db = db_generator.GenerateJoinDb(); + + std::vector db; + db.emplace_back(std::move(v_db)); + db.emplace_back(std::move(d_db)); + db.emplace_back(std::move(j_db)); + + ProductivityChecker productivity_checker(std::unique_ptr + (new annotation_utils::SimpleAACalculator)); + + VDJMetarootCreator metaroot_creator(config.simulation_params.base_repertoire_params.metaroot_simulation_params, + db); + + auto t1 = std::chrono::high_resolution_clock::now(); + size_t N((int) 1e4); + size_t prod = 0; + for (size_t i = 0; i < N; ++i) { + auto root = metaroot_creator.Createroot(); + if (productivity_checker.IsProductive(root)) { + prod++; + } + } + std::cout << prod << " / " << N << std::endl; + auto t2 = std::chrono::high_resolution_clock::now(); + std::chrono::duration fp = t2 - t1; + std::cout << "Simulation of " << N << " VDJ metaroots took " << fp.count() << "ms" << std::endl; + } +} + +} // End namespace ig_simulator diff --git a/src/test/test_vj_finder.cpp b/src/test/test_vj_finder.cpp index 739cd727..3c830493 100644 --- a/src/test/test_vj_finder.cpp +++ b/src/test/test_vj_finder.cpp @@ -4,7 +4,7 @@ #include #include -#include +#include #include #include @@ -105,8 +105,8 @@ TEST_F(VJFinderTest, BaseVJFinderTest) { vj_finder_config.algorithm_params.fix_crop_fill_params.fill_right = true; vj_finder_config.algorithm_params.fix_crop_fill_params.fix_right = 3; read_archive.ExtractFromFile("test_dataset/vj_finder_test.fastq"); - vj_finder::GermlineDbGenerator db_generator(vj_finder_config.io_params.input_params.germline_input, - vj_finder_config.algorithm_params.germline_params); + germline_utils::GermlineDbGenerator db_generator(vj_finder_config.io_params.input_params.germline_input, + vj_finder_config.algorithm_params.germline_params); auto v_gene_database = db_generator.GenerateVariableDb(); auto j_gene_database = db_generator.GenerateJoinDb(); vj_finder::VJParallelProcessor processor(read_archive, diff --git a/src/umi_experiments/CMakeLists.txt b/src/umi_experiments/CMakeLists.txt index c5daee39..7041899e 100644 --- a/src/umi_experiments/CMakeLists.txt +++ b/src/umi_experiments/CMakeLists.txt @@ -16,20 +16,20 @@ file(GLOB HEADER_FILES **/*.hpp) #set(CMAKE_BUILD_TYPE "Debug") -add_executable(check_graph_symmetry tools/check_graph_symmetry.cpp utils.cpp) -add_executable(umi_to_fastq tools/umi_to_fastq.cpp utils.cpp umi_utils.cpp) +add_executable(check_graph_symmetry tools/check_graph_symmetry.cpp ig_simulator_utils.cpp) +add_executable(umi_to_fastq tools/umi_to_fastq.cpp ig_simulator_utils.cpp umi_utils.cpp) add_executable(simulate_tiny_dataset tools/simulate_tiny_dataset.cpp) -add_executable(umi_graph tools/umi_graph.cpp utils.cpp umi_utils.cpp) -add_executable(analyze_intermed_clusters tools/analyze_intermed_clusters.cpp utils.cpp) -add_executable(find_bad_cluster tools/find_bad_cluster.cpp utils.cpp clusterer.cpp) +add_executable(umi_graph tools/umi_graph.cpp ig_simulator_utils.cpp umi_utils.cpp) +add_executable(analyze_intermed_clusters tools/analyze_intermed_clusters.cpp ig_simulator_utils.cpp) +add_executable(find_bad_cluster tools/find_bad_cluster.cpp ig_simulator_utils.cpp clusterer.cpp) add_executable(report_pcr_error_rate tools/report_pcr_error_rate.cpp tools/error_analyzer.cpp tools/error_analyzer.hpp umi_utils.cpp utils/io.cpp) -add_executable(reads_by_umi_stats stats/reads_by_umi_stats.cpp utils.cpp umi_utils.cpp stats/dist_distribution_stats.cpp) -add_executable(print_graph_decomposition_stats stats/print_graph_decomposition_stats.cpp utils.cpp) -add_executable(pairwise_dist_stats stats/pairwise_dist_stats.cpp utils/io.cpp clusterer.cpp utils.cpp) -add_executable(dists_inside_clusters stats/dists_inside_clusters.cpp utils/io.cpp clusterer.cpp utils.cpp) +add_executable(reads_by_umi_stats stats/reads_by_umi_stats.cpp ig_simulator_utils.cpp umi_utils.cpp stats/dist_distribution_stats.cpp) +add_executable(print_graph_decomposition_stats stats/print_graph_decomposition_stats.cpp ig_simulator_utils.cpp) +add_executable(pairwise_dist_stats stats/pairwise_dist_stats.cpp utils/io.cpp clusterer.cpp ig_simulator_utils.cpp) +add_executable(dists_inside_clusters stats/dists_inside_clusters.cpp utils/io.cpp clusterer.cpp ig_simulator_utils.cpp) add_executable(umi_correction_stats stats/umi_correction_stats.cpp umi_utils.cpp utils/io.cpp) -add_executable(umi_naive naive/umi_naive.cpp ${HEADER_FILES} utils.cpp umi_utils.cpp clusterer.cpp utils/io.cpp) +add_executable(umi_naive naive/umi_naive.cpp ${HEADER_FILES} ig_simulator_utils.cpp umi_utils.cpp clusterer.cpp utils/io.cpp) add_executable(report_umi_abundance report_umi_abundance.cpp utils/io.cpp) -add_executable(cluster_reads cluster_reads.cpp ${HEADER_FILES} utils.cpp umi_utils.cpp clusterer.cpp utils/io.cpp ../fast_ig_tools/fast_ig_tools.cpp) +add_executable(cluster_reads cluster_reads.cpp ${HEADER_FILES} ig_simulator_utils.cpp umi_utils.cpp clusterer.cpp utils/io.cpp ../fast_ig_tools/fast_ig_tools.cpp) diff --git a/src/umi_experiments/utils.cpp b/src/umi_experiments/ig_simulator_utils.cpp similarity index 100% rename from src/umi_experiments/utils.cpp rename to src/umi_experiments/ig_simulator_utils.cpp diff --git a/src/vdj_utils/CMakeLists.txt b/src/vdj_utils/CMakeLists.txt index 75bcd355..fbf146b9 100644 --- a/src/vdj_utils/CMakeLists.txt +++ b/src/vdj_utils/CMakeLists.txt @@ -8,6 +8,7 @@ add_library(vdj_utils STATIC germline_utils/lymphocyte_type.cpp germline_utils/chain_type.cpp germline_utils/germline_gene_type.cpp + germline_utils/germline_db_generator.cpp germline_utils/germline_databases/immune_gene_database.cpp germline_utils/germline_databases/chain_database.cpp germline_utils/germline_databases/custom_gene_database.cpp @@ -21,6 +22,6 @@ add_library(vdj_utils STATIC annotation_utils/annotated_clone.cpp annotation_utils/annotated_clone_calculator.cpp annotation_utils/annotated_clone_set.cpp - ) + germline_utils/germline_config.cpp) target_link_libraries(vdj_utils core ${COMMON_LIBRARIES}) diff --git a/src/vdj_utils/annotation_utils/aa_annotation/aa_calculator.cpp b/src/vdj_utils/annotation_utils/aa_annotation/aa_calculator.cpp index 0a4dc635..08dd6b39 100644 --- a/src/vdj_utils/annotation_utils/aa_annotation/aa_calculator.cpp +++ b/src/vdj_utils/annotation_utils/aa_annotation/aa_calculator.cpp @@ -3,14 +3,14 @@ #include "aa_calculator.hpp" namespace annotation_utils { - bool SimpleAACalculator::ComputeInFrame(const CDRLabeling &cdr_labeling) { + bool SimpleAACalculator::ComputeInFrame(const CDRLabeling &cdr_labeling) const { CDRRange end_region = (cdr_labeling.cdr3.Valid()) ? cdr_labeling.cdr3 : cdr_labeling.cdr2; VERIFY_MSG(end_region.Valid() and cdr_labeling.cdr1.Valid(), "CDRs regions are not defined, ORF cannot be identified"); return (end_region.end_pos - cdr_labeling.cdr1.start_pos + 1) % 3 == 0; } - bool SimpleAACalculator::FindStopCodon(const AAString &aa_str) { + bool SimpleAACalculator::FindStopCodon(const AAString &aa_str) const { bool has_stop_codon = false; for(size_t i = 0; i < seqan::length(aa_str); i++) if(aa_str[i] == '*') { @@ -21,7 +21,7 @@ namespace annotation_utils { } AminoAcidAnnotation SimpleAACalculator::ComputeAminoAcidAnnotation(const core::Read &read, - const CDRLabeling &cdr_labeling) { + const CDRLabeling &cdr_labeling) const { VERIFY_MSG(cdr_labeling.cdr1.Valid(), "CDR1 is not defined, AA sequence cannot be computed"); using namespace seqan; StringSet, Owner > > aa_seqs; diff --git a/src/vdj_utils/annotation_utils/aa_annotation/aa_calculator.hpp b/src/vdj_utils/annotation_utils/aa_annotation/aa_calculator.hpp index 0b5d2ba0..6f0f6bd9 100644 --- a/src/vdj_utils/annotation_utils/aa_annotation/aa_calculator.hpp +++ b/src/vdj_utils/annotation_utils/aa_annotation/aa_calculator.hpp @@ -8,19 +8,21 @@ namespace annotation_utils { class BaseAACalculator { public: virtual AminoAcidAnnotation ComputeAminoAcidAnnotation(const core::Read& read, - const CDRLabeling &cdr_labeling) = 0; + const CDRLabeling &cdr_labeling) const = 0; virtual ~BaseAACalculator() { } }; class SimpleAACalculator : public BaseAACalculator { private: - bool ComputeInFrame(const CDRLabeling &cdr_labeling); + bool ComputeInFrame(const CDRLabeling &cdr_labeling) const; - bool FindStopCodon(const AAString &aa_str); + bool FindStopCodon(const AAString &aa_str) const; public: AminoAcidAnnotation ComputeAminoAcidAnnotation(const core::Read& read, - const CDRLabeling &cdr_labeling); + const CDRLabeling &cdr_labeling) const override; }; + + using BaseAACalculatorPtr = std::unique_ptr; } \ No newline at end of file diff --git a/src/vdj_utils/germline_utils/germline_config.cpp b/src/vdj_utils/germline_utils/germline_config.cpp new file mode 100644 index 00000000..5b0404b5 --- /dev/null +++ b/src/vdj_utils/germline_utils/germline_config.cpp @@ -0,0 +1,28 @@ +// +// Created by Andrew Bzikadze on 3/16/17. +// + +#include "germline_config.hpp" +#include +#include +#include + +namespace germline_utils { + +void load(GermlineInput &gi, boost::property_tree::ptree const &pt, bool) { + using config_common::load; + load(gi.germline_filenames_config, pt, "germline_filenames_config"); + load(gi.ig_dir, pt, "ig_dir"); + load(gi.tcr_dir, pt, "tcr_dir"); +} + +void load(GermlineParams &gp, boost::property_tree::ptree const &pt, bool) { + using config_common::load; + load(gp.germline_dir, pt, "germline_dir"); + load(gp.loci, pt, "loci"); + load(gp.organism, pt, "organism"); + load(gp.pseudogenes, pt, "pseudogenes"); +} + + +} // End namespace germline_utils diff --git a/src/vdj_utils/germline_utils/germline_config.hpp b/src/vdj_utils/germline_utils/germline_config.hpp new file mode 100644 index 00000000..c17c14ab --- /dev/null +++ b/src/vdj_utils/germline_utils/germline_config.hpp @@ -0,0 +1,29 @@ +// +// Created by Andrew Bzikadze on 3/16/17. +// + +#pragma once +#include "io/library.hpp" +#include "config_singl.hpp" +#include + +namespace germline_utils { + +struct GermlineInput { + std::string ig_dir; + std::string tcr_dir; + std::string germline_filenames_config; +}; + +struct GermlineParams { + std::string germline_dir; + std::string organism; + std::string loci; + bool pseudogenes; +}; + + +void load(GermlineInput &gi, boost::property_tree::ptree const &pt, bool); +void load(GermlineParams &gp, boost::property_tree::ptree const &pt, bool); + +} // End namespace germline_utils diff --git a/src/vj_finder/germline_db_generator.cpp b/src/vdj_utils/germline_utils/germline_db_generator.cpp similarity index 66% rename from src/vj_finder/germline_db_generator.cpp rename to src/vdj_utils/germline_utils/germline_db_generator.cpp index 545ea5cb..195cf1ed 100644 --- a/src/vj_finder/germline_db_generator.cpp +++ b/src/vdj_utils/germline_utils/germline_db_generator.cpp @@ -2,72 +2,67 @@ #include "germline_db_generator.hpp" -#include - -namespace vj_finder { - class LociParam { - public: - static bool LociIncludeIg(std::string loci) { - if(loci.size() < 2) - return false; - return loci == "all" or loci.substr(0, 2) == "IG"; - } +namespace germline_utils { + bool LociParam::LociIncludeIg(std::string loci) { + if(loci.size() < 2) + return false; + return loci == "all" or loci.substr(0, 2) == "IG"; + } - static bool LociIncludeTr(std::string loci) { - if(loci.size() < 2) - return false; - return loci == "all" or loci.substr(0, 2) == "TR"; - } + bool LociParam::LociIncludeTr(std::string loci) { + if(loci.size() < 2) + return false; + return loci == "all" or loci.substr(0, 2) == "TR"; + } - static bool LociIncludeIgh(std::string loci) { - return loci == "all" or loci == "IGH" or loci == "IG"; - } + bool LociParam::LociIncludeIgh(std::string loci) { + return loci == "all" or loci == "IGH" or loci == "IG"; + } - static bool LociIncludeIgk(std::string loci) { - return loci == "all" or loci == "IGK" or loci == "IG"; - } + bool LociParam::LociIncludeIgk(std::string loci) { + return loci == "all" or loci == "IGK" or loci == "IG"; + } - static bool LociIncludeIgl(std::string loci) { - return loci == "all" or loci == "IGL" or loci == "IG"; - } + bool LociParam::LociIncludeIgl(std::string loci) { + return loci == "all" or loci == "IGL" or loci == "IG"; + } - static bool LociIncludeTra(std::string loci) { - return loci == "all" or loci == "TRA" or loci == "TR"; - } + bool LociParam::LociIncludeTra(std::string loci) { + return loci == "all" or loci == "TRA" or loci == "TR"; + } - static bool LociIncludeTrb(std::string loci) { - return loci == "all" or loci == "TRB" or loci == "TR"; - } + bool LociParam::LociIncludeTrb(std::string loci) { + return loci == "all" or loci == "TRB" or loci == "TR"; + } - static bool LociIncludeTrg(std::string loci) { - return loci == "all" or loci == "TRG" or loci == "TR"; - } + bool LociParam::LociIncludeTrg(std::string loci) { + return loci == "all" or loci == "TRG" or loci == "TR"; + } - static bool LociIncludeTrd(std::string loci) { - return loci == "all" or loci == "TRD" or loci == "TR"; - } + bool LociParam::LociIncludeTrd(std::string loci) { + return loci == "all" or loci == "TRD" or loci == "TR"; + } - static std::vector ConvertIntoChainTypes(std::string loci) { - std::vector chain_types; - if(loci.size() < 2) - return chain_types; - if(LociIncludeIgh(loci)) - chain_types.push_back(germline_utils::ChainType("IGH")); - if(LociIncludeIgk(loci)) - chain_types.push_back(germline_utils::ChainType("IGK")); - if(LociIncludeIgl(loci)) - chain_types.push_back(germline_utils::ChainType("IGL")); - if(LociIncludeTra(loci)) - chain_types.push_back(germline_utils::ChainType("TRA")); - if(LociIncludeTrb(loci)) - chain_types.push_back(germline_utils::ChainType("TRB")); - if(LociIncludeTrg(loci)) - chain_types.push_back(germline_utils::ChainType("TRG")); - if(LociIncludeTrd(loci)) - chain_types.push_back(germline_utils::ChainType("TRD")); + std::vector LociParam::ConvertIntoChainTypes(std::string loci) { + std::vector chain_types; + if (loci.size() < 2) return chain_types; - } - }; + if (LociIncludeIgh(loci)) + chain_types.push_back(germline_utils::ChainType("IGH")); + if (LociIncludeIgk(loci)) + chain_types.push_back(germline_utils::ChainType("IGK")); + if (LociIncludeIgl(loci)) + chain_types.push_back(germline_utils::ChainType("IGL")); + if (LociIncludeTra(loci)) + chain_types.push_back(germline_utils::ChainType("TRA")); + if (LociIncludeTrb(loci)) + chain_types.push_back(germline_utils::ChainType("TRB")); + if (LociIncludeTrg(loci)) + chain_types.push_back(germline_utils::ChainType("TRG")); + if (LociIncludeTrd(loci)) + chain_types.push_back(germline_utils::ChainType("TRD")); + return chain_types; + } class GermlineFilesConfig { struct ExtendedImmuneGeneType { @@ -139,10 +134,10 @@ namespace vj_finder { }; class ChainDirectoryParam { - const VJFinderConfig::IOParams::InputParams::GermlineInput &gi_; + const germline_utils::GermlineInput &gi_; public: - ChainDirectoryParam(const VJFinderConfig::IOParams::InputParams::GermlineInput &gi) : + ChainDirectoryParam(const germline_utils::GermlineInput &gi) : gi_(gi) { } std::string GetDirByChainType(germline_utils::ChainType chain_type) { @@ -168,6 +163,11 @@ namespace vj_finder { germline_files_config.GetFilenameByImmuneGeneType( ImmuneGeneType(*it, SegmentType::VariableSegment), germ_params_.pseudogenes))); + if (it->IsVDJ()) + d_genes_fnames_.push_back(path::append_path(lymph_dir, + germline_files_config.GetFilenameByImmuneGeneType( + ImmuneGeneType(*it, SegmentType::DiversitySegment), + germ_params_.pseudogenes))); j_genes_fnames_.push_back(path::append_path(lymph_dir, germline_files_config.GetFilenameByImmuneGeneType( ImmuneGeneType(*it, SegmentType::JoinSegment), @@ -176,6 +176,11 @@ namespace vj_finder { INFO(v_genes_fnames_.size() << " V gene segment files will be used for DB: "); for(size_t i = 0; i < v_genes_fnames_.size(); i++) INFO(chain_types_[i] << ": " << v_genes_fnames_[i]); + + INFO(d_genes_fnames_.size() << " D gene segment files will be used for DB: "); + for(size_t i = 0; i < d_genes_fnames_.size(); i++) + INFO(chain_types_[i] << ": " << d_genes_fnames_[i]); + INFO(j_genes_fnames_.size() << " J gene segment files will be used for DB: "); for(size_t i = 0; i < j_genes_fnames_.size(); i++) INFO(chain_types_[i] << ": " << j_genes_fnames_[i]); @@ -190,6 +195,15 @@ namespace vj_finder { return v_custom_db; } + germline_utils::CustomGeneDatabase GermlineDbGenerator::GenerateDiversityDb() { + germline_utils::CustomGeneDatabase d_custom_db(germline_utils::SegmentType::DiversitySegment); + for(size_t i = 0; i < d_genes_fnames_.size(); i++) + d_custom_db.AddDatabase(germline_utils::ImmuneGeneType(chain_types_[i], + germline_utils::SegmentType::DiversitySegment), + d_genes_fnames_[i]); + return d_custom_db; + } + germline_utils::CustomGeneDatabase GermlineDbGenerator::GenerateJoinDb() { germline_utils::CustomGeneDatabase j_custom_db(germline_utils::SegmentType::JoinSegment); for(size_t i = 0; i < j_genes_fnames_.size(); i++) diff --git a/src/vdj_utils/germline_utils/germline_db_generator.hpp b/src/vdj_utils/germline_utils/germline_db_generator.hpp new file mode 100644 index 00000000..8f4a42df --- /dev/null +++ b/src/vdj_utils/germline_utils/germline_db_generator.hpp @@ -0,0 +1,47 @@ +#pragma once + +#include "germline_utils/germline_config.hpp" +#include + +namespace germline_utils { + class GermlineDbGenerator { + const germline_utils::GermlineInput &germ_input_; + const germline_utils::GermlineParams &germ_params_; + + std::vector chain_types_; + std::vector v_genes_fnames_; + std::vector d_genes_fnames_; + std::vector j_genes_fnames_; + + void GenerateGeneFnames(); + + public: + GermlineDbGenerator(const germline_utils::GermlineInput &germ_input, + const germline_utils::GermlineParams &germ_params) : + germ_input_(germ_input), + germ_params_(germ_params) { + GenerateGeneFnames(); + } + + germline_utils::CustomGeneDatabase GenerateVariableDb(); + + germline_utils::CustomGeneDatabase GenerateDiversityDb(); + + germline_utils::CustomGeneDatabase GenerateJoinDb(); + }; + + class LociParam { + public: + static bool LociIncludeIg(std::string loci); + static bool LociIncludeTr(std::string loci); + static bool LociIncludeIgh(std::string loci); + static bool LociIncludeIgk(std::string loci); + static bool LociIncludeIgl(std::string loci); + static bool LociIncludeTra(std::string loci); + static bool LociIncludeTrb(std::string loci); + static bool LociIncludeTrg(std::string loci); + static bool LociIncludeTrd(std::string loci); + + static std::vector ConvertIntoChainTypes(std::string loci); + }; +} \ No newline at end of file diff --git a/src/vj_finder/CMakeLists.txt b/src/vj_finder/CMakeLists.txt index daead5da..d3937232 100644 --- a/src/vj_finder/CMakeLists.txt +++ b/src/vj_finder/CMakeLists.txt @@ -18,7 +18,6 @@ add_definitions(-DSEQAN_HAS_BZIP2=1) add_library(vj_finder_library STATIC vj_finder_config.cpp command_line_routines.cpp - germline_db_generator.cpp vj_alignment_structs.cpp vj_query_aligner.cpp vj_hits_filter.cpp diff --git a/src/vj_finder/germline_db_generator.hpp b/src/vj_finder/germline_db_generator.hpp deleted file mode 100644 index c47e65dd..00000000 --- a/src/vj_finder/germline_db_generator.hpp +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -#include "vj_finder_config.hpp" -#include - -namespace vj_finder { - class GermlineDbGenerator { - const VJFinderConfig::IOParams::InputParams::GermlineInput &germ_input_; - const VJFinderConfig::AlgorithmParams::GermlineParams &germ_params_; - - std::vector chain_types_; - std::vector v_genes_fnames_; - std::vector j_genes_fnames_; - - void GenerateGeneFnames(); - - public: - GermlineDbGenerator(const VJFinderConfig::IOParams::InputParams::GermlineInput &germ_input, - const VJFinderConfig::AlgorithmParams::GermlineParams &germ_params) : - germ_input_(germ_input), - germ_params_(germ_params) { - GenerateGeneFnames(); - } - - germline_utils::CustomGeneDatabase GenerateVariableDb(); - - germline_utils::CustomGeneDatabase GenerateJoinDb(); - }; -} \ No newline at end of file diff --git a/src/vj_finder/vj_finder_config.cpp b/src/vj_finder/vj_finder_config.cpp index 7047323f..37b5a747 100644 --- a/src/vj_finder/vj_finder_config.cpp +++ b/src/vj_finder/vj_finder_config.cpp @@ -3,6 +3,7 @@ #include #include #include +#include "germline_utils/germline_config.hpp" namespace vj_finder { @@ -11,13 +12,6 @@ namespace vj_finder { load(rp.num_threads, pt, "num_threads"); } - void load(VJFinderConfig::IOParams::InputParams::GermlineInput &gi, boost::property_tree::ptree const &pt, bool) { - using config_common::load; - load(gi.germline_filenames_config, pt, "germline_filenames_config"); - load(gi.ig_dir, pt, "ig_dir"); - load(gi.tcr_dir, pt, "tcr_dir"); - } - void update_input_config(VJFinderConfig::IOParams::InputParams & ip) { ip.germline_input.germline_filenames_config = path::append_path(ip.config_dir, ip.germline_input.germline_filenames_config); @@ -85,14 +79,6 @@ namespace vj_finder { load(ap.fix_strand, pt, "fix_strand"); } - void load(VJFinderConfig::AlgorithmParams::GermlineParams &gp, boost::property_tree::ptree const &pt, bool) { - using config_common::load; - load(gp.germline_dir, pt, "germline_dir"); - load(gp.loci, pt, "loci"); - load(gp.organism, pt, "organism"); - load(gp.pseudogenes, pt, "pseudogenes"); - } - void load(VJFinderConfig::AlgorithmParams::FilteringParams &fp, boost::property_tree::ptree const &pt, bool) { using config_common::load; load(fp.enable_filtering, pt, "enable_filtering"); diff --git a/src/vj_finder/vj_finder_config.hpp b/src/vj_finder/vj_finder_config.hpp index 14e01dc5..2187c46c 100644 --- a/src/vj_finder/vj_finder_config.hpp +++ b/src/vj_finder/vj_finder_config.hpp @@ -3,6 +3,7 @@ #include "io/library.hpp" #include "config_singl.hpp" #include +#include "germline_utils/germline_config.hpp" namespace vj_finder { struct VJFinderConfig { @@ -12,15 +13,9 @@ namespace vj_finder { struct IOParams { struct InputParams { - struct GermlineInput { - std::string ig_dir; - std::string tcr_dir; - std::string germline_filenames_config; - }; - std::string input_reads; std::string config_dir; - GermlineInput germline_input; + germline_utils::GermlineInput germline_input; }; struct OutputParams { @@ -69,13 +64,6 @@ namespace vj_finder { size_t min_aligned_length; }; - struct GermlineParams { - std::string germline_dir; - std::string organism; - std::string loci; - bool pseudogenes; - }; - struct FixCropFillParams { enum FixCropFillAlgorithm { UnknowmFCFAlgorithm, AggressiveFCFAlgorithm }; @@ -119,7 +107,7 @@ namespace vj_finder { }; AlignerParams aligner_params; - GermlineParams germline_params; + germline_utils::GermlineParams germline_params; FilteringParams filtering_params; FixCropFillParams fix_crop_fill_params; ScoringParams scoring_params; diff --git a/src/vj_finder/vjf_launch.cpp b/src/vj_finder/vjf_launch.cpp index 79d64f49..6511437d 100644 --- a/src/vj_finder/vjf_launch.cpp +++ b/src/vj_finder/vjf_launch.cpp @@ -3,9 +3,11 @@ #include "vjf_launch.hpp" #include -#include "germline_db_generator.hpp" +#include "germline_utils/germline_db_generator.hpp" #include "vj_parallel_processor.hpp" +using namespace germline_utils; + namespace vj_finder { void CreateAlignmentOutput(std::ofstream& fhandler, const core::Read& read, const VJHits& vj_hits) { fhandler << read.name << "\t" << vj_hits.GetVHitByIndex(0).Start() << "\t" <<