Skip to content

Commit

Permalink
Formatted some of the documentation to be cleaner
Browse files Browse the repository at this point in the history
  • Loading branch information
SachaPaquette committed Oct 25, 2024
1 parent 8c3e6dd commit 095ea2d
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 119 deletions.
173 changes: 77 additions & 96 deletions aphylogeo/alignement.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,11 +315,10 @@ def alignSequencesWithPairwise(self, centroidKey, centroidSeq):
def muscleAlign(self):
"""Method to perform a multiple DNA sequence alignment using Muscle Algorithm
Return:
-------
(Dict): heuristicMSA
Keys: accession ID
Values: Aligned sequences
Returns (Dict):
heuristicMSA
- Keys: accession ID
- Values: Aligned sequences
"""
try:
if sys.platform == "win32":
Expand All @@ -343,11 +342,10 @@ def muscleAlign(self):
def clustalAlign(self):
"""Method to perform a multiple DNA sequence alignment using ClustalW2 Algorithm
Return:
-------
(Dict): heuristicMSA
Keys: accession ID
Values: Aligned sequences
Returns (Dict):
heuristicMSA
- Keys: accession ID
- Values: Aligned sequences
"""
try:
if sys.platform == "win32":
Expand All @@ -369,11 +367,10 @@ def clustalAlign(self):
def mafftAlign(self):
"""Method to perform a multiple DNA sequence alignment using MAFFT Algorithm
Return:
-------
(Dict): heuristicMSA
Keys: accession ID
Values: Aligned sequences
Returns (Dict):
heuristicMSA
- Keys: accession ID
- Values: Aligned sequences
"""
try:
if sys.platform == "win32":
Expand Down Expand Up @@ -421,16 +418,14 @@ def narrowFitPairwise(self, aligned):
The length of each sequence from the pairwise alignment are set equal by
inserting dash (-) in most appropriate location of a given sequence.
Parameters:
-----------
alignment: dict of nested dict
{accession couple #1 : {Centroid Acc:Centroid Aligned Seq, Non-centroid Acc #1: non-centroid Aligned Seq #1},
... ,
{accession couple #n : {Centroid Acc:Centroid Aligned Seq, Non-centroid Acc #n: non-centroid Aligned Seq #n}}
args:
alignment: dict of nested dict
{accession couple #1 : {Centroid Acc:Centroid Aligned Seq, Non-centroid Acc #1: non-centroid Aligned Seq #1},
... ,
{accession couple #n : {Centroid Acc:Centroid Aligned Seq, Non-centroid Acc #n: non-centroid Aligned Seq #n}}
Return:
-------
A dictionary of all accessions and their fitted aligned sequences.
Returns:
A dictionary of all accessions and their fitted aligned sequences.
"""
seqs = self.getAlignSeqs(aligned)
max_len = max(self.getAlignSeqLens(aligned))
Expand All @@ -444,13 +439,11 @@ def narrowFitPairwise(self, aligned):
def getAlignSeqs(self, aligned):
"""Extract all sequences aligned using a pairwise alignment
Parameters:
-----------
alignment: see fitPairwise(alignment) docstring
args:
alignment: see fitPairwise(alignment) docstring
Return:
-------
List of sequences aligned through pairwise alignment
Returns:
List of sequences aligned through pairwise alignment
"""
seqs = []
for alignment in aligned:
Expand All @@ -460,42 +453,36 @@ def getAlignSeqs(self, aligned):
def getAlignSeqLens(self, aligned):
"""Get length of all sequences aligned using a pairwise alignment
Parameters:
-----------
alignment: see fitPairwise(alignment) docstring
args:
alignment: see fitPairwise(alignment) docstring
Return:
-------
List of the length of each aligned sequences
Returns:
List of the length of each aligned sequences
"""
return [len(seq) for seq in self.getAlignSeqs(aligned)]

def getAlignCouple(self, aligned):
"""Get nested couple accessions and their respective sequences
Parameters:
-----------
alignment: see fitPairwise(alignment) docstring
args:
alignment: see fitPairwise(alignment) docstring
Return:
-------
List of paired accessions and their aligned sequences
Returns:
List of paired accessions and their aligned sequences
"""
return [val for val in list(aligned.values())]

def extractOneAlignAcc(self, aligned, nest_ord=0):
"""Extract the accession from a nested alignment couple
Parameters:
-----------
alignment: see fitPairwise(alignment) docstring
nest_ord, int, optional:
The position of the nested accessions (Default = 0 (centroid), 1 (aligned sequence))
args:
alignment: see fitPairwise(alignment) docstring
nest_ord (int) optional:
The position of the nested accessions (Default = 0 (centroid), 1 (aligned sequence))
Return:
-------
The list of either centroid (nest_ord = 0 (Default)) or non-centroid (nest_ord = 1)
accessions of a group of sequences aligned throug pairwise alignment.
Returns:
The list of either centroid (nest_ord = 0 (Default)) or non-centroid (nest_ord = 1)
accessions of a group of sequences aligned throug pairwise alignment.
"""
try:
return [list(i)[nest_ord] for i in self.getAlignCouple(aligned)]
Expand All @@ -507,15 +494,13 @@ def extractOneAlignAcc(self, aligned, nest_ord=0):
def isCurrentCharDash(self, seqs, seq_i, ch_i):
"""Assess whether the character at current cursor position is a dash
Parameters:
-----------
seqs, list: aligned sequences to fit
seq_i, int: index of the current sequence
ch_i, int: index of the currenct character
args:
seqs (list): aligned sequences to fit
seq_i (int): index of the current sequence
ch_i (int): index of the currenct character
Return:
-------
True if the current character assessed is a dash, False otherwise
Returns:
True if the current character assessed is a dash, False otherwise
"""
try:
return seqs[seq_i][ch_i] == "-"
Expand All @@ -528,14 +513,12 @@ def insertDashToShorterSeq(self, seqs, ch_i, aligned):
Insert a dash (-) character in a sequence if its length is shorter
than the longest one in the group of aligned sequence.
Parameters:
-----------
seqs, list: aligned sequences to fit
seq_i, int: index of the current sequence
args:
seqs (list): aligned sequences to fit
seq_i (int): index of the current sequence
Return:
-------
List, The fitted sequences of a pairwise alignment
Returns (List):
- The fitted sequences of a pairwise alignment
"""
for seq_j in range(0, len(seqs)):
try:
Expand All @@ -548,14 +531,12 @@ def insertDashToShorterSeq(self, seqs, ch_i, aligned):
def mergeFitPairwise(self, aligned, seqs):
"""Generate a dictionary of all accessions and their fitted sequences
Parameters:
-----------
alignment: see fitPairwise(alignment) docstring
seqs, list: aligned sequences to fit
args:
alignment: see fitPairwise(alignment) docstring
seqs (list): aligned sequences to fit
Return:
-------
Dict, Group of accessions and their fitted sequences from a pairwise alignment
Returns (Dict):
Group of accessions and their fitted sequences from a pairwise alignment
"""
centroid = {list(set(self.extractOneAlignAcc(aligned)))[0]: seqs[0]}
non_centroid = dict(zip(self.extractOneAlignAcc(aligned, 1), seqs[1::2]))
Expand All @@ -564,14 +545,12 @@ def mergeFitPairwise(self, aligned, seqs):
def appendDashToShorterSeqs(self, seqs, max_len):
"""Append dash to all sequences shorter than the longest one from a list of sequences
Parameters:
-----------
seqs, list: List of fitted sequences post pairwise alignment
max_len int: Length of the longest aligned sequence, including the blank/dash
args:
seqs, list: List of fitted sequences post pairwise alignment
max_len int: Length of the longest aligned sequence, including the blank/dash
Return:
-------
List of sequences with dash appended where applicable
Returns:
List of sequences with dash appended where applicable
"""
return [f"{str(seq):-<{max_len}}" for seq in seqs]

Expand All @@ -582,7 +561,7 @@ def starAlignement(self, centroidKey, aligned):
"-" is found in the seqA of a pair, but not another, it is inserted
into every other ones.
ex.:
Example:
pair1: pair2:
seqA1: TACTAC seqA2: TAC-TAC
Expand Down Expand Up @@ -726,8 +705,8 @@ def insertDash(self, dict, pos, keyList):
Arguments:
dict (dict) contains many objects as:
key = (string)
values = (string)
- key = (string)
- values = (string)
pos (int) the char position at wich to insert
keyList (list) list of keys of objects to modify
Variables:
Expand All @@ -746,7 +725,7 @@ def slidingWindow(self, heuristicMSA, optimized=True):
"""
Method that slices all the sequences in a dictionary to a specific window (substring)
ex.:
Example:
step_size=3
window_size=5
Expand All @@ -755,8 +734,8 @@ def slidingWindow(self, heuristicMSA, optimized=True):
Args:
alignedSequences (Dictionary)
Key (String) is the ID of the specimen
Data (Seq(String)) is the specimen's DNS sequence
- Key (String) is the ID of the specimen
- Data (Seq(String)) is the specimen's DNS sequence
others* (var) see param.yaml
Return:
Expand Down Expand Up @@ -813,10 +792,10 @@ def dictToFile(self, dict, filename, ext):
Debuging method that creates files from a dictonnary of sequences.
File is put in the debug file of the cwd
arguments
args:
dict (dict) the objects to write in the file
key = (string)
values = (string)
- key = (string)
- values = (string)
filename (String) the name of the future file
ext (String) the file extension
Expand All @@ -839,10 +818,10 @@ def makeMSA(self, windowed):
objects from bioPython. Each entry in the dictionnary is a MSA object
of a single sliding window.
return
returns:
msaSet (dict)
key (String) the window name
value (AlignIO) the MSA object
- key (String) - the window name
- value (AlignIO) - the MSA object
"""
msaSet = {}
for windowSet in windowed.keys():
Expand All @@ -859,13 +838,15 @@ def fileToDict(filename, ext):
Method that reads a fasta file and returns a dictionnary of Seq objects
arguments:
filename (String) the name of the file
ext (String) the file extension
filename (String)
the name of the file
ext (String)
the file extension
return:
dict (dict)
key = (string)
values = (string)
- key = (string)
- values = (string)
"""
f = open(Path(filename + ext), "r")
dict = {}
Expand Down
21 changes: 12 additions & 9 deletions aphylogeo/multiProcessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,8 @@ def processingLargeData(self):
child processes as the available RAM permits, launching new ones as RAM is freed.
Variables:
p (Process): Represents a single child process.
p : Process
Represents a single child process.
Returns:
list: The multiprocess-friendly list that is updated by each child process.
Expand Down Expand Up @@ -184,8 +185,8 @@ def buttler(self, memBloc):
constantly run other methods forever.
In this case, it:
updates de memory capacity and
prints updates on the terminal.
- Updates the memory capacity.
- Prints updates to the terminal
It exists so not to bottleneck the main thread.
Uses timers to execute it's methods because time.sleep()
Expand All @@ -211,8 +212,10 @@ def memUpdate(self):
This method is ran from the buttler() and updates every second
Variables:
memBuffer double %Amount of bytes to substract from the available RAM for safety purposes
mem double Amount of bytes
memBuffer : float
Percentage of bytes to subtract from the available RAM for safety purposes.
mem : float
Amount of bytes.
"""

memBuffer = 0.9 # 90%
Expand Down Expand Up @@ -312,11 +315,11 @@ def processingSmallData(self):
causes some marginal time lost; Only use for methods that take at
least a second to run : below that, a for loop is likely much faster
Variables:
p Process Representes a single child process
a None Exists only to permit the for loop
Variables:
p : Process
Represents a single child process.
Return:
Returns:
The multiprocess-friendly list, that was updated by each child
Errors:
Expand Down
Loading

0 comments on commit 095ea2d

Please sign in to comment.