From 0424b7655342550ab64a75925ba8c9bbeec570fd Mon Sep 17 00:00:00 2001
From: shivani-madugula <madugulashivani@gmail.com>
Date: Thu, 31 Jan 2019 11:53:51 +0530
Subject: [PATCH] Add files via upload

---
 Edit_opt/seq_lev_count.py | 102 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)
 create mode 100644 Edit_opt/seq_lev_count.py

diff --git a/Edit_opt/seq_lev_count.py b/Edit_opt/seq_lev_count.py
new file mode 100644
index 0000000..bfb3e7c
--- /dev/null
+++ b/Edit_opt/seq_lev_count.py
@@ -0,0 +1,102 @@
+from __future__ import print_function
+from random import sample, choice
+from Levenshtein import distance
+from Bio import SeqIO
+import sys
+import string
+import datetime as d
+
+
+#@jit
+def all_edit():
+    
+    length_cutoff = int(sys.argv[1])
+    edit_dis = int(sys.argv[2])
+    seq_file = sys.argv[3]
+    filename = sys.argv[4]
+
+    repeat_file = open("all_repeats_1-6nt.txt","r")
+
+    records = SeqIO.parse(seq_file,"fasta")
+
+
+    repeats_out = dict()
+    new_motifs = set()
+    out_file = open(filename,"w+")
+    
+    motif_lengths = []
+
+    for line in repeat_file:
+        motif_dict = dict()
+        
+        L = line.strip().split('\t')
+        motif = L[0]
+        motif_length = int(L[2])
+
+        if motif_length not in motif_lengths:
+            motif_lengths.append(motif_length)
+        i=0
+        input_seq_length = motif_length
+        while i < motif_length and input_seq_length < length_cutoff:
+            motif += motif[i]
+            i += 1
+            if i >= motif_length:
+                i = 0
+            input_seq_length = len(motif)
+        nu_count = dict()
+        nu_count['A'] = motif.count('A')
+        nu_count['T'] = motif.count('T')
+        nu_count['G'] = motif.count('G')
+        nu_count['C'] = motif.count('C')
+        motif_dict['class'] = L[1]
+        motif_dict['motif_length'] = motif_length
+        motif_dict['strand'] = L[3]
+        motif_dict['ncount'] = nu_count
+        repeats_out[motif] = motif_dict
+        repeats_out['rep_lengths'] = [length_cutoff]
+        new_motifs.add(motif) #consists of all the motifs of specified length
+        #print(repeats_out[motif]['ncount']['A'])
+        #break
+
+    """
+    accessing the newly formed motifs 
+    """
+
+    #alphabet = ['A','T','G','C']
+    #i=-1
+    
+    repeat_lengths = repeats_out['rep_lengths'] # All possible length cutoffs
+    
+    for record in records:
+
+        input_seq = str(record.seq).upper()
+        print(input_seq)
+
+        input_seq_length = len(input_seq)
+        for length_cutoff in repeat_lengths:
+            #fallback = length_cutoff - 1
+            sub_start = 0  # substring start
+            sub_stop = sub_start + repeat_lengths[-1]  # substring stop
+            while sub_stop <= input_seq_length:
+            
+                subseq = input_seq[sub_start:sub_stop]
+
+                if subseq not in new_motifs:
+                    
+                    for ext_rep in new_motifs:
+                        #i = i+1
+                        cal_val = (abs(subseq.count('A') - repeats_out[ext_rep]['ncount']['A']) + abs(subseq.count('T') - repeats_out[ext_rep]['ncount']['T']) + abs(subseq.count('G') - repeats_out[ext_rep]['ncount']['G'])+ abs(subseq.count('C') - repeats_out[ext_rep]['ncount']['C']))/edit_dis
+                        if cal_val <= edit_dis:
+                            cal_edit_dis = distance(subseq, ext_rep)
+                            if(cal_edit_dis <= edit_dis):
+                                print('{:<20s} {:<20s} {:<20s} {:<20s} {:<20s} {:<10s} {:<10s}'.format(record.id,str(sub_start),str(sub_stop),subseq,ext_rep,repeats_out[ext_rep]['class'],str(cal_edit_dis)),file = out_file)
+                                #print(record.id,str(sub_start),str(sub_stop),subseq,ext_rep,repeats_out[ext_rep]['class'],str(cal_edit_dis),sep='\t',file = out_file)
+
+            
+                sub_start += 1
+                sub_stop = sub_start + length_cutoff
+                
+
+st = d.datetime.now()    
+all_edit()
+print(d.datetime.now()-st)