From 095ea2dd058f7fa6356d76cbd4296d55b9349642 Mon Sep 17 00:00:00 2001 From: sach Date: Fri, 25 Oct 2024 15:45:42 -0400 Subject: [PATCH] Formatted some of the documentation to be cleaner --- aphylogeo/alignement.py | 173 ++++++++++++++++-------------------- aphylogeo/multiProcessor.py | 21 +++-- aphylogeo/utils.py | 14 --- 3 files changed, 89 insertions(+), 119 deletions(-) diff --git a/aphylogeo/alignement.py b/aphylogeo/alignement.py index d83972ca..228cae64 100644 --- a/aphylogeo/alignement.py +++ b/aphylogeo/alignement.py @@ -315,11 +315,10 @@ def alignSequencesWithPairwise(self, centroidKey, centroidSeq): def muscleAlign(self): """Method to perform a multiple DNA sequence alignment using Muscle Algorithm - Return: - ------- - (Dict): heuristicMSA - Keys: accession ID - Values: Aligned sequences + Returns (Dict): + heuristicMSA + - Keys: accession ID + - Values: Aligned sequences """ try: if sys.platform == "win32": @@ -343,11 +342,10 @@ def muscleAlign(self): def clustalAlign(self): """Method to perform a multiple DNA sequence alignment using ClustalW2 Algorithm - Return: - ------- - (Dict): heuristicMSA - Keys: accession ID - Values: Aligned sequences + Returns (Dict): + heuristicMSA + - Keys: accession ID + - Values: Aligned sequences """ try: if sys.platform == "win32": @@ -369,11 +367,10 @@ def clustalAlign(self): def mafftAlign(self): """Method to perform a multiple DNA sequence alignment using MAFFT Algorithm - Return: - ------- - (Dict): heuristicMSA - Keys: accession ID - Values: Aligned sequences + Returns (Dict): + heuristicMSA + - Keys: accession ID + - Values: Aligned sequences """ try: if sys.platform == "win32": @@ -421,16 +418,14 @@ def narrowFitPairwise(self, aligned): The length of each sequence from the pairwise alignment are set equal by inserting dash (-) in most appropriate location of a given sequence. - Parameters: - ----------- - alignment: dict of nested dict - {accession couple #1 : {Centroid Acc:Centroid Aligned Seq, Non-centroid Acc #1: non-centroid Aligned Seq #1}, - ... , - {accession couple #n : {Centroid Acc:Centroid Aligned Seq, Non-centroid Acc #n: non-centroid Aligned Seq #n}} + args: + alignment: dict of nested dict + {accession couple #1 : {Centroid Acc:Centroid Aligned Seq, Non-centroid Acc #1: non-centroid Aligned Seq #1}, + ... , + {accession couple #n : {Centroid Acc:Centroid Aligned Seq, Non-centroid Acc #n: non-centroid Aligned Seq #n}} - Return: - ------- - A dictionary of all accessions and their fitted aligned sequences. + Returns: + A dictionary of all accessions and their fitted aligned sequences. """ seqs = self.getAlignSeqs(aligned) max_len = max(self.getAlignSeqLens(aligned)) @@ -444,13 +439,11 @@ def narrowFitPairwise(self, aligned): def getAlignSeqs(self, aligned): """Extract all sequences aligned using a pairwise alignment - Parameters: - ----------- - alignment: see fitPairwise(alignment) docstring + args: + alignment: see fitPairwise(alignment) docstring - Return: - ------- - List of sequences aligned through pairwise alignment + Returns: + List of sequences aligned through pairwise alignment """ seqs = [] for alignment in aligned: @@ -460,42 +453,36 @@ def getAlignSeqs(self, aligned): def getAlignSeqLens(self, aligned): """Get length of all sequences aligned using a pairwise alignment - Parameters: - ----------- - alignment: see fitPairwise(alignment) docstring + args: + alignment: see fitPairwise(alignment) docstring - Return: - ------- - List of the length of each aligned sequences + Returns: + List of the length of each aligned sequences """ return [len(seq) for seq in self.getAlignSeqs(aligned)] def getAlignCouple(self, aligned): """Get nested couple accessions and their respective sequences - Parameters: - ----------- - alignment: see fitPairwise(alignment) docstring + args: + alignment: see fitPairwise(alignment) docstring - Return: - ------- - List of paired accessions and their aligned sequences + Returns: + List of paired accessions and their aligned sequences """ return [val for val in list(aligned.values())] def extractOneAlignAcc(self, aligned, nest_ord=0): """Extract the accession from a nested alignment couple - Parameters: - ----------- - alignment: see fitPairwise(alignment) docstring - nest_ord, int, optional: - The position of the nested accessions (Default = 0 (centroid), 1 (aligned sequence)) + args: + alignment: see fitPairwise(alignment) docstring + nest_ord (int) optional: + The position of the nested accessions (Default = 0 (centroid), 1 (aligned sequence)) - Return: - ------- - The list of either centroid (nest_ord = 0 (Default)) or non-centroid (nest_ord = 1) - accessions of a group of sequences aligned throug pairwise alignment. + Returns: + The list of either centroid (nest_ord = 0 (Default)) or non-centroid (nest_ord = 1) + accessions of a group of sequences aligned throug pairwise alignment. """ try: return [list(i)[nest_ord] for i in self.getAlignCouple(aligned)] @@ -507,15 +494,13 @@ def extractOneAlignAcc(self, aligned, nest_ord=0): def isCurrentCharDash(self, seqs, seq_i, ch_i): """Assess whether the character at current cursor position is a dash - Parameters: - ----------- - seqs, list: aligned sequences to fit - seq_i, int: index of the current sequence - ch_i, int: index of the currenct character + args: + seqs (list): aligned sequences to fit + seq_i (int): index of the current sequence + ch_i (int): index of the currenct character - Return: - ------- - True if the current character assessed is a dash, False otherwise + Returns: + True if the current character assessed is a dash, False otherwise """ try: return seqs[seq_i][ch_i] == "-" @@ -528,14 +513,12 @@ def insertDashToShorterSeq(self, seqs, ch_i, aligned): Insert a dash (-) character in a sequence if its length is shorter than the longest one in the group of aligned sequence. - Parameters: - ----------- - seqs, list: aligned sequences to fit - seq_i, int: index of the current sequence + args: + seqs (list): aligned sequences to fit + seq_i (int): index of the current sequence - Return: - ------- - List, The fitted sequences of a pairwise alignment + Returns (List): + - The fitted sequences of a pairwise alignment """ for seq_j in range(0, len(seqs)): try: @@ -548,14 +531,12 @@ def insertDashToShorterSeq(self, seqs, ch_i, aligned): def mergeFitPairwise(self, aligned, seqs): """Generate a dictionary of all accessions and their fitted sequences - Parameters: - ----------- - alignment: see fitPairwise(alignment) docstring - seqs, list: aligned sequences to fit + args: + alignment: see fitPairwise(alignment) docstring + seqs (list): aligned sequences to fit - Return: - ------- - Dict, Group of accessions and their fitted sequences from a pairwise alignment + Returns (Dict): + Group of accessions and their fitted sequences from a pairwise alignment """ centroid = {list(set(self.extractOneAlignAcc(aligned)))[0]: seqs[0]} non_centroid = dict(zip(self.extractOneAlignAcc(aligned, 1), seqs[1::2])) @@ -564,14 +545,12 @@ def mergeFitPairwise(self, aligned, seqs): def appendDashToShorterSeqs(self, seqs, max_len): """Append dash to all sequences shorter than the longest one from a list of sequences - Parameters: - ----------- - seqs, list: List of fitted sequences post pairwise alignment - max_len int: Length of the longest aligned sequence, including the blank/dash + args: + seqs, list: List of fitted sequences post pairwise alignment + max_len int: Length of the longest aligned sequence, including the blank/dash - Return: - ------- - List of sequences with dash appended where applicable + Returns: + List of sequences with dash appended where applicable """ return [f"{str(seq):-<{max_len}}" for seq in seqs] @@ -582,7 +561,7 @@ def starAlignement(self, centroidKey, aligned): "-" is found in the seqA of a pair, but not another, it is inserted into every other ones. - ex.: + Example: pair1: pair2: seqA1: TACTAC seqA2: TAC-TAC @@ -726,8 +705,8 @@ def insertDash(self, dict, pos, keyList): Arguments: dict (dict) contains many objects as: - key = (string) - values = (string) + - key = (string) + - values = (string) pos (int) the char position at wich to insert keyList (list) list of keys of objects to modify Variables: @@ -746,7 +725,7 @@ def slidingWindow(self, heuristicMSA, optimized=True): """ Method that slices all the sequences in a dictionary to a specific window (substring) - ex.: + Example: step_size=3 window_size=5 @@ -755,8 +734,8 @@ def slidingWindow(self, heuristicMSA, optimized=True): Args: alignedSequences (Dictionary) - Key (String) is the ID of the specimen - Data (Seq(String)) is the specimen's DNS sequence + - Key (String) is the ID of the specimen + - Data (Seq(String)) is the specimen's DNS sequence others* (var) see param.yaml Return: @@ -813,10 +792,10 @@ def dictToFile(self, dict, filename, ext): Debuging method that creates files from a dictonnary of sequences. File is put in the debug file of the cwd - arguments + args: dict (dict) the objects to write in the file - key = (string) - values = (string) + - key = (string) + - values = (string) filename (String) the name of the future file ext (String) the file extension @@ -839,10 +818,10 @@ def makeMSA(self, windowed): objects from bioPython. Each entry in the dictionnary is a MSA object of a single sliding window. - return + returns: msaSet (dict) - key (String) the window name - value (AlignIO) the MSA object + - key (String) - the window name + - value (AlignIO) - the MSA object """ msaSet = {} for windowSet in windowed.keys(): @@ -859,13 +838,15 @@ def fileToDict(filename, ext): Method that reads a fasta file and returns a dictionnary of Seq objects arguments: - filename (String) the name of the file - ext (String) the file extension + filename (String) + the name of the file + ext (String) + the file extension return: dict (dict) - key = (string) - values = (string) + - key = (string) + - values = (string) """ f = open(Path(filename + ext), "r") dict = {} diff --git a/aphylogeo/multiProcessor.py b/aphylogeo/multiProcessor.py index 3683d60e..ed2987e4 100644 --- a/aphylogeo/multiProcessor.py +++ b/aphylogeo/multiProcessor.py @@ -114,7 +114,8 @@ def processingLargeData(self): child processes as the available RAM permits, launching new ones as RAM is freed. Variables: - p (Process): Represents a single child process. + p : Process + Represents a single child process. Returns: list: The multiprocess-friendly list that is updated by each child process. @@ -184,8 +185,8 @@ def buttler(self, memBloc): constantly run other methods forever. In this case, it: - updates de memory capacity and - prints updates on the terminal. + - Updates the memory capacity. + - Prints updates to the terminal It exists so not to bottleneck the main thread. Uses timers to execute it's methods because time.sleep() @@ -211,8 +212,10 @@ def memUpdate(self): This method is ran from the buttler() and updates every second Variables: - memBuffer double %Amount of bytes to substract from the available RAM for safety purposes - mem double Amount of bytes + memBuffer : float + Percentage of bytes to subtract from the available RAM for safety purposes. + mem : float + Amount of bytes. """ memBuffer = 0.9 # 90% @@ -312,11 +315,11 @@ def processingSmallData(self): causes some marginal time lost; Only use for methods that take at least a second to run : below that, a for loop is likely much faster - Variables: - p Process Representes a single child process - a None Exists only to permit the for loop + Variables: + p : Process + Represents a single child process. - Return: + Returns: The multiprocess-friendly list, that was updated by each child Errors: diff --git a/aphylogeo/utils.py b/aphylogeo/utils.py index c8808f57..16aa8c28 100644 --- a/aphylogeo/utils.py +++ b/aphylogeo/utils.py @@ -90,11 +90,6 @@ def leastSquare(tree1, tree2): - Leaves must all have a twin in each tree. - A tree must not have duplicate leaves. - Example: - x x \n - ╓╫╖ ╓╫╖ \n - 123 312 - :param tree1: The first tree to compare. :type tree1: distanceTree (from Biopython) :param tree2: The second tree to compare. @@ -124,11 +119,6 @@ def robinsonFoulds(tree1, tree2): - Leaves must all have a twin in each tree. - A tree must not have duplicate leaves - Example: - x x \n - ╓╫╖ ╓╫╖ \n - 123 312 - :param tree1: The first tree to compare :type tree1: distanceTree object from biopython converted to Newick :param tree2: The second tree to compare @@ -157,10 +147,6 @@ def euclideanDist(tree1, tree2): The bipartition bitmasks of the trees must be correct for the current tree structures (by calling :meth:`Tree.encode_bipartitions()` method) - x x \n - ╓╫╖ ╓╫╖ \n - 123 312 - :param tree1: The first tree to compare :type tree1: distanceTree object from biopython converted to DendroPY format Newick :param tree2: The second tree to compare