diff --git a/simulator/alisimulator.cpp b/simulator/alisimulator.cpp index 1c354a12e..99b4a5ec5 100644 --- a/simulator/alisimulator.cpp +++ b/simulator/alisimulator.cpp @@ -1035,6 +1035,10 @@ void AliSimulator::executeIM(int thread_id, int &sequence_length, int default_se default_random_engine generator; generator.seed(params->ran_seed + MPIHelper::getInstance().getProcessID() * 1000 + params->alignment_id); + // Bug fix: in some cases the ids of leaves are not continuous -> in IM algorithm with multiple threads, we use the leaf id to jump to the current position to output the simulated sequences -> we need to build a vector of continuous ids + if (num_threads > 1) + buildContinousIdsForTree(); + // init the output stream initOutputFile(out, thread_id, actual_segment_length, output_filepath, open_mode, write_sequences_to_tmp_data); @@ -1942,7 +1946,7 @@ void AliSimulator::outputOneSequence(Node* node, string &output, int thread_id, // cache output into the writing queue if (num_threads != 1) { - int64_t pos = ((int64_t)node->id) * ((int64_t)output_line_length); + int64_t pos = ((int64_t)node_continuous_id[node->id]) * ((int64_t)output_line_length); pos += starting_pos + (num_sites_per_state == 1 ? segment_start : (segment_start * num_sites_per_state)) + (thread_id == 0 ? 0 : seq_name_length); cacheSeqChunkStr(pos, output, thread_id); } @@ -3760,3 +3764,60 @@ void AliSimulator::mergeChunksAllNodes(Node* node, Node* dad) mergeChunksAllNodes((*it)->node, node); } } + + +void AliSimulator::buildContinousIdsForLeave(Node* node, Node* dad) +{ + // start from root + if (!node) { + node = tree->root; + tree->leafNum = 0; + } + + // reset the leaf's id + if (node->isLeaf() && node->name != ROOT_NAME) + node_continuous_id[node->id] = tree->leafNum++; + + // traverse further + FOR_NEIGHBOR_IT(node, dad, it) { + buildContinousIdsForLeave((*it)->node, node); + } +} + +void AliSimulator::buildContinousIdsForInternals(Node* node, Node* dad) +{ + // start from root + if (!node) { + node = tree->root; + tree->nodeNum = tree->leafNum; + } + + // reset the id of internal nodes + if (!node->isLeaf()) + node_continuous_id[node->id] = tree->nodeNum++; + + // traverse further + FOR_NEIGHBOR_IT(node, dad, it) { + buildContinousIdsForInternals((*it)->node, node); + } +} + +void AliSimulator::buildContinousIdsForTree() +{ + // backup the number of leaves and nodes + int leafNum = tree->leafNum; + int nodeNum = tree->nodeNum; + + // init the array to store the continuous ids + node_continuous_id.resize(nodeNum + 1); + + // build the continuous ids for leaves + buildContinousIdsForLeave(); + + // build the continuous ids for internal nodes + buildContinousIdsForInternals(); + + // restore the number of leaves and nodes + tree->leafNum = leafNum; + tree->nodeNum = nodeNum; +} diff --git a/simulator/alisimulator.h b/simulator/alisimulator.h index d08e89e9a..0441b6eed 100644 --- a/simulator/alisimulator.h +++ b/simulator/alisimulator.h @@ -363,6 +363,21 @@ class AliSimulator{ */ void mergeChunksAllNodes(Node* node = NULL, Node* dad = NULL); + /** + build a contious ids for leaves in the tree (in some cases, the ids of leaves are not continuous) + */ + void buildContinousIdsForLeave(Node* node = NULL, Node* dad = NULL); + + /** + build a contious ids for internal nodes in the tree (in some cases, the ids of leaves are not continuous) + */ + void buildContinousIdsForInternals(Node* node = NULL, Node* dad = NULL); + + /** + build a contious ids for all nodes in the tree (in some cases, the ids of leaves are not continuous) + */ + void buildContinousIdsForTree(); + /** init the output file */ @@ -461,6 +476,7 @@ class AliSimulator{ vector cache_start_indexes; int cache_size_per_thread; bool force_output_PHYLIP = false; + vector node_continuous_id; // variables using for posterior mean rates/state frequencies bool applyPosRateHeterogeneity = false;