Skip to content

Commit

Permalink
fixed operon handling
Browse files Browse the repository at this point in the history
  • Loading branch information
hannahn281 committed May 1, 2023
1 parent 127359b commit f4e42a0
Showing 1 changed file with 30 additions and 20 deletions.
50 changes: 30 additions & 20 deletions class_orthologs.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,15 @@ def get_nuc_rec(self):


def get_nuc_seq(self):

# if no record then we cant find seq
if self.nuc_rec is None:
print(self.nuc_rec)
print("\tRecord is not valid")
return None

print("Get nucleotide sequence for " + str(self.nuc_rec['acc']) + "...")

# grabs upstream region some parameter base pairs away from start to check for operons
if self.nuc_rec['strand'] == '+':
s_start = int(self.nuc_rec['start']) - self.blast_param["base_pairs_to_grab"]
Expand Down Expand Up @@ -138,8 +141,6 @@ def get_nuc_seq(self):

sequence = genome_record.seq

#print(genome_record.features)

# setting up return id
return_id = "p_" + str(self.nuc_rec['acc'])

Expand All @@ -165,23 +166,23 @@ def get_nuc_seq(self):
genes.append(gb_features[i])

upstream_regions = []
if self.nuc_rec['strand'] == '+':
# starts at end of list of features since we are going upstream
gene = len(genes) - 1
# LATER make sure to implement a way to add 10,000 more when you dont reach the end
while gene > 0:
print("here")
if len(upstream_regions) == 0:
upstream_regions.append((genes[gene - 1].location.end, genes[gene].location.start))
elif genes[gene].location.strand == genes[gene - 1].location.strand:
if abs(genes[gene].location.start - genes[gene - 1].location.end) <= self.blast_param["min_intergenic_distance"]:
if abs(genes[gene].location.start - genes[gene - 1].location.end) > 0:
upstream_regions.append((genes[gene - 1].location.end, genes[gene].location.start))
else:
break
# starts at end of list of features since we are going upstream
gene = len(genes) - 1
# LATER make sure to implement a way to add 10,000 more when you dont reach the end
while gene > 0:
print("here")
if len(upstream_regions) == 0:
upstream_regions.append((genes[gene - 1].location.end, genes[gene].location.start))
elif genes[gene].location.strand == genes[gene - 1].location.strand:
if abs(genes[gene].location.start - genes[gene - 1].location.end) <= self.blast_param["min_intergenic_distance"]:
if abs(genes[gene].location.start - genes[gene - 1].location.end) > 0:
upstream_regions.append((genes[gene - 1].location.end, genes[gene].location.start))
else:
break
gene -= 1
else:
break
gene -= 1
'''
else:
# starts at beginning of list of features since we are going upstream on reverse strand
gene = 0
Expand All @@ -202,17 +203,26 @@ def get_nuc_seq(self):
else:
break
gene += 1
'''

print("Upstream Regions to Take: ")
print(upstream_regions)

# get sequences of each upstream region
upstream_sequences = []
for i in range(len(upstream_regions)):
upstream_sequence = sequence[upstream_regions[i][0]:upstream_regions[i][1]]
upstream_sequences.append(SeqRecord(Seq.Seq(upstream_sequence), id=return_id, description=return_description))
upstream_sequence = sequence
if sequence[upstream_regions[i][0]:upstream_regions[i][1]] != "":
upstream_sequence = sequence[upstream_regions[i][0]:upstream_regions[i][1]]
upstream_sequences.append(SeqRecord(Seq.Seq(upstream_sequence), id=return_id, description=return_description))
else:
upstream_sequence = sequence[upstream_regions[i][1]:upstream_regions[i][0]]
upstream_sequences.append(SeqRecord(Seq.Seq(upstream_sequence), id=return_id, description=return_description))

print(upstream_sequence)

self.nuc_seq = upstream_sequence

print(upstream_sequences[0].seq.complement().reverse_complement())
return upstream_sequence

def percent_similarity(self, ortholog2):
Expand Down

0 comments on commit f4e42a0

Please sign in to comment.