Skip to content

Commit

Permalink
Merge pull request #127 from yana-safonova/ig_simulator
Browse files Browse the repository at this point in the history
IgSimulator 2.0.alpha
  • Loading branch information
eodus authored Apr 28, 2017
2 parents e8c28da + 8f2fbd6 commit 6e26045
Show file tree
Hide file tree
Showing 110 changed files with 5,578 additions and 147 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/py"
PATTERN "*.pyc" EXCLUDE
PATTERN ".gitignore" EXCLUDE)

set(tools "igrec.py;mass_spectra_analyzer.py;dense_subgraph_finder.py;igquast.py;barcoded_igrec.py;diversity_analyzer.py")
set(tools "igrec.py;mass_spectra_analyzer.py;dense_subgraph_finder.py;igquast.py;barcoded_igrec.py;diversity_analyzer.py;ig_simulator.py")
foreach(tool ${tools})
install(PROGRAMS ${tool}
DESTINATION "${INSTALL_DIR}"
Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,15 @@ cdr: cmake
umi: cmake
$(MAKE) -C build/release umi_correction_stats umi_graph umi_naive umi_to_fastq

igs: cmake
$(MAKE) -C build/release ig_simulator

clean:
-rm -r build

clean_tests:
-rm *.pyc
-rm -r igrec_test
-rm -r ms_analyzer_test
-rm -r ig_simulator_test
-rm *~
107 changes: 107 additions & 0 deletions configs/ig_simulator/config.info
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
io_params {
input_params {
germline_input {
ig_dir IG_antevolo
tcr_dir TCR
germline_filenames_config ./configs/vj_finder/germline_files_config.txt
}
cdr_labeler_config_filename ./configs/cdr_labeler/config.info
}

output_params {
output_dir ig_simulator_test
log_filename log.properties
base_repertoire_filename base_repertoire.fasta
base_repertoire_info base_repertoire.info
filtered_pool filtered_pool.fasta
full_pool full_pool.fasta
trees_dir trees_dir
}
}

germline_params {
organism human
loci IGH
pseudogenes false
germline_dir ./data/germline
}

simulation_params {
base_repertoire_params {
metaroot_simulation_params {
gene_chooser_params {
gene_chooser_method uniform
}

nucleotides_remover_params {
nucleotides_remover_method uniform
uniform_remover_params {
max_remove_v_gene 20
max_remove_d_gene_left 5
max_remove_d_gene_right 5
max_remove_j_gene 10
}
}

p_nucleotides_creator_params {
p_nucleotides_creator_method uniform
uniform_creator_params {
max_create_v_gene 5
max_create_d_gene_left 3
max_create_d_gene_right 3
max_create_j_gene 3
}
}

n_nucleotides_inserter_params {
n_nucleotides_method uniform
uniform_inserter_params {
max_vj_insertion 10
max_vd_insertion 21
max_dj_insertion 23
}
}

cleavage_params {
prob_cleavage_v 0.5
prob_cleavage_d_left 0.5
prob_cleavage_d_right 0.5
prob_cleavage_j 0.5
}
}

multiplicity_creator_params {
multiplicity_method geometric
geometric_params {
lambda 0.1
}
}

productive_params {
productive_part 1
}

number_of_metaroots 10
}

clonal_tree_simulator_params {
tree_size_generator_params {
tree_size_generator_method geometric
geometric_params {
lambda 0.001
}
}

shm_creator_params {
shm_creator_method poisson
poisson_params {
lambda 2
}
}

pool_manager_strategy wide; uniform, wide, deep
prob_ret_to_pool 0.9
lambda_distr_n_children 0.3

}
}
107 changes: 107 additions & 0 deletions configs/ig_simulator/config.info.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
io_params {
input_params {
germline_input {
ig_dir IG_antevolo
tcr_dir TCR
germline_filenames_config ./configs/vj_finder/germline_files_config.txt
}
cdr_labeler_config_filename ./configs/cdr_labeler/config.info
}

output_params {
output_dir ig_simulator_test
log_filename log.properties
base_repertoire_filename base_repertoire.fasta
base_repertoire_info base_repertoire.info
filtered_pool filtered_pool.fasta
full_pool full_pool.fasta
trees_dir trees_dir
}
}

germline_params {
organism human
loci IGH
pseudogenes false
germline_dir ./data/germline
}

simulation_params {
base_repertoire_params {
metaroot_simulation_params {
gene_chooser_params {
gene_chooser_method uniform
}

nucleotides_remover_params {
nucleotides_remover_method uniform
uniform_remover_params {
max_remove_v_gene 20
max_remove_d_gene_left 5
max_remove_d_gene_right 5
max_remove_j_gene 10
}
}

p_nucleotides_creator_params {
p_nucleotides_creator_method uniform
uniform_creator_params {
max_create_v_gene 5
max_create_d_gene_left 3
max_create_d_gene_right 3
max_create_j_gene 3
}
}

n_nucleotides_inserter_params {
n_nucleotides_method uniform
uniform_inserter_params {
max_vj_insertion 10
max_vd_insertion 21
max_dj_insertion 23
}
}

cleavage_params {
prob_cleavage_v 0.5
prob_cleavage_d_left 0.5
prob_cleavage_d_right 0.5
prob_cleavage_j 0.5
}
}

multiplicity_creator_params {
multiplicity_method geometric
geometric_params {
lambda 0.1
}
}

productive_params {
productive_part 1
}

number_of_metaroots 10
}

clonal_tree_simulator_params {
tree_size_generator_params {
tree_size_generator_method geometric
geometric_params {
lambda 0.001
}
}

shm_creator_params {
shm_creator_method poisson
poisson_params {
lambda 2
}
}

pool_manager_strategy wide; uniform, wide, deep
prob_ret_to_pool 0.9
lambda_distr_n_children 0.3

}
}
54 changes: 54 additions & 0 deletions data/germline/human/IG_antevolo/IGHD.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
>IGHD1-1*01
ggtacaactggaacgac
>IGHD1-14*01
ggtataaccggaaccac
>IGHD1-20*01
ggtataactggaacgac
>IGHD1-26*01
ggtatagtgggagctactac
>IGHD1-7*01
ggtataactggaactac
>IGHD2-15*01
aggatattgtagtggtggtagctgctactcc
>IGHD2-2*01
aggatattgtagtagtaccagctgctatgcc
>IGHD2-21*01
agcatattgtggtggtgattgctattcc
>IGHD2-8*01
aggatattgtactaatggtgtatgctatacc
>IGHD3-10*01
gtattactatggttcggggagttattataac
>IGHD3-16*01
gtattatgattacgtttgggggagttatgcttatacc
>IGHD3-22*01
gtattactatgatagtagtggttattactac
>IGHD3-3*01
gtattacgatttttggagtggttattatacc
>IGHD3-9*01
gtattacgatattttgactggttattataac
>IGHD4-11*01
tgactacagtaactac
>IGHD4-17*01
tgactacggtgactac
>IGHD4-23*01
tgactacggtggtaactcc
>IGHD4-4*01
tgactacagtaactac
>IGHD5-12*01
gtggatatagtggctacgattac
>IGHD5-18*01
gtggatacagctatggttac
>IGHD5-24*01
gtagagatggctacaattac
>IGHD5-5*01
gtggatacagctatggttac
>IGHD6-13*01
gggtatagcagcagctggtac
>IGHD6-19*01
gggtatagcagtggctggtac
>IGHD6-25*01
gggtatagcagcggctac
>IGHD6-6*01
gagtatagcagctcgtcc
>IGHD7-27*01
ctaactgggga
12 changes: 12 additions & 0 deletions data/germline/human/IG_antevolo/IGHJ.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
>IGHJ1*01
gctgaatacttccagcactggggccagggcaccctggtcaccgtctcctcag
>IGHJ2*01
ctactggtacttcgatctctggggccgtggcaccctggtcactgtctcctcag
>IGHJ3*01
tgatgcttttgatgtctggggccaagggacaatggtcaccgtctcttcag
>IGHJ4*01
actactttgactactggggccaaggaaccctggtcaccgtctcctcag
>IGHJ5*01
acaactggttcgactcctggggccaaggaaccctggtcaccgtctcctcag
>IGHJ6*01
attactactactactacggtatggacgtctggggcaaagggaccacggtcaccgtctcctcag
Loading

0 comments on commit 6e26045

Please sign in to comment.