diff --git a/coupon_collector.py b/coupon_collector.py index 1695d62..966ab6c 100644 --- a/coupon_collector.py +++ b/coupon_collector.py @@ -106,7 +106,7 @@ def display_parameters(n_motifs, n_picks, dv, dc, k, n, motifs, symbols, Harr, H print("The Codeword is \n{}\n".format(C)) return -def get_parameters(n_motifs, n_picks, dv, dc, k, n, ffdim, display=True, Harr=None, H=None, G=None): +def get_parameters(n_motifs, n_picks, dv, dc, k, n, ffdim, zero_codeword=False, display=True, Harr=None, H=None, G=None): """ Returns the parameters for the simulation """ # Starting adresses from 1 @@ -114,22 +114,28 @@ def get_parameters(n_motifs, n_picks, dv, dc, k, n, ffdim, display=True, Harr=No symbols = choose_symbols(n_motifs, n_picks) - symbols.pop(-1) - symbols.pop(-2) - symbols.pop(-3) + symbols.pop() + symbols.pop() + symbols.pop() symbol_keys = np.arange(0, ffdim) + #graph = VariableTannerGraph(dv, dc, k, n, ffdim=ffdim) graph = TannerGraph(dv, dc, k, n, ffdim=ffdim) if Harr is None: Harr = r.get_H_arr(dv, dc, k, n) - H = r.get_H_Matrix(dv, dc, k, n, Harr) - G = r.parity_to_generator(H, ffdim=ffdim) + + H = r.get_H_Matrix(dv, dc, k, n, Harr) + #print(H) + if zero_codeword: + G = np.zeros([k,n], dtype=int) + else: + G = r.parity_to_generator(H, ffdim=ffdim) + graph.establish_connections(Harr) - - + if np.any(np.dot(G, H.T) % ffdim != 0): print("Matrices are not valid, aborting simulation") exit() @@ -144,9 +150,6 @@ def get_parameters(n_motifs, n_picks, dv, dc, k, n, ffdim, display=True, Harr=No print("Codeword is not valid, aborting simulation") exit() - if display: - display_parameters(n_motifs, n_picks, dv, dc, k, n, motifs, symbols, Harr, H, G, C, ffdim) - return graph, C, symbols, motifs def get_parameters_sc_ldpc(n_motifs, n_picks, L, M, dv, dc, k, n, ffdim, display=True, Harr=None, H=None, G=None): @@ -157,9 +160,9 @@ def get_parameters_sc_ldpc(n_motifs, n_picks, L, M, dv, dc, k, n, ffdim, display symbols = choose_symbols(n_motifs, n_picks) - symbols.pop(-1) - symbols.pop(-2) - symbols.pop(-3) + symbols.pop() + symbols.pop() + symbols.pop() symbol_keys = np.arange(0, ffdim) @@ -218,7 +221,7 @@ def run_singular_decoding(graph, C, read_length, symbols, motifs, n_picks): print("Decoding unsuccessful") return None -def decoding_errors_fer(k, n, dv, dc, graph, C, symbols, motifs, n_picks, decoding_failures_parameter=10, max_iterations=10000, iterations=50, uncoded=False, bec_decoder=False, label=None, code_class="", read_lengths=np.arange(1,20)): +def decoding_errors_fer(k, n, dv, dc, graph, C, symbols, motifs, n_picks, decoding_failures_parameter=100, max_iterations=100, iterations=50, uncoded=False, masked = False, bec_decoder=False, label=None, code_class="", read_lengths=np.arange(1,20)): """ Returns the frame error rate curve - for same H, same G, same C""" frame_error_rate = [] @@ -228,10 +231,25 @@ def decoding_errors_fer(k, n, dv, dc, graph, C, symbols, motifs, n_picks, decodi for i in tqdm(read_lengths): decoding_failures, iterations, counter = 0, 0, 0 for j in tqdm(range(max_iterations)): - # Assigning values to Variable Nodes after generating erasures in zero array - symbols_read = read_symbols(C, i, symbols, motifs, n_picks) + + #print(C[:10]) + if masked: + mask = [np.random.randint(ffdim) for i in range(n)] + C2 = [(C[i] + mask[i]) % ffdim for i in range(len(C))] + symbols_read = read_symbols(C2, i, symbols, motifs, n_picks) + + # Unmasking + symbols_read = [[(i - mask[j]) % ffdim for i in symbols_read[j]] for j in range(len(symbols_read))] + #print(symbols_read[:10]) + #exit() + else: + symbols_read = read_symbols(C, i, symbols, motifs, n_picks) + #print(symbols_read[:10]) + + + if not uncoded: - graph.assign_values(read_symbols(C, i, symbols, motifs, n_picks)) + graph.assign_values(symbols_read) if bec_decoder: decoded_values = graph.coupon_collector_erasure_decoder() else: @@ -240,6 +258,10 @@ def decoding_errors_fer(k, n, dv, dc, graph, C, symbols, motifs, n_picks, decodi decoded_values = symbols_read # Getting the average error rates for iteration runs + #print(C[:10]) + #print(decoded_values[:10]) + + # Would want to fix this ideally if sum([len(i) for i in decoded_values]) == len(decoded_values): if np.all(np.array(decoded_values).T[0] == C): @@ -257,8 +279,8 @@ def decoding_errors_fer(k, n, dv, dc, graph, C, symbols, motifs, n_picks, decodi frame_error_rate.append(error_rate) - plt.plot(read_lengths, frame_error_rate, 'o') - plt.plot(read_lengths, frame_error_rate, label=label) + plt.plot(read_lengths, frame_error_rate, 'o', label=label) + plt.plot(read_lengths, frame_error_rate) plt.title("Frame Error Rate for CC for {}{}-{} {}-{} for 8C4 Symbols".format(code_class, k, n, dv, dc)) plt.ylabel("Frame Error Rate") plt.xlabel("Read Length") @@ -272,7 +294,7 @@ def decoding_errors_fer(k, n, dv, dc, graph, C, symbols, motifs, n_picks, decodi -def run_fer(n_motifs, n_picks, dv, dc, k, n, L, M, ffdim, code_class="", iterations=5, bec_decoder=False, uncoded=False, saved_code=False, singular_decoding=True, fer_errors=True, read_lengths=np.arange(1,20)): +def run_fer(n_motifs, n_picks, dv, dc, k, n, L, M, ffdim, code_class="", iterations=5, bec_decoder=False, uncoded=False, saved_code=False, singular_decoding=False, fer_errors=True, read_lengths=np.arange(1,20), zero_codeword=False, label="", Harr=None, masked=False): if saved_code: Harr, H, G = get_saved_code(dv, dc, k, n, L, M, code_class=code_class) @@ -280,33 +302,40 @@ def run_fer(n_motifs, n_picks, dv, dc, k, n, L, M, ffdim, code_class="", iterati if code_class == "sc_": graph, C, symbols, motifs = get_parameters_sc_ldpc(n_motifs, n_picks, L, M, dv, dc, k, n, ffdim, display=False) else: - graph, C, symbols, motifs = get_parameters(n_motifs, n_picks, dv, dc, k, n, ffdim, display=True) + graph, C, symbols, motifs = get_parameters(n_motifs, n_picks, dv, dc, k, n, ffdim, display=False, zero_codeword=zero_codeword, Harr=Harr) if singular_decoding: run_singular_decoding(graph, C, 8, symbols, motifs, n_picks) - if bec_decoder: + elif bec_decoder: print(decoding_errors_fer(k, n, dv, dc, graph, C, symbols, motifs, n_picks, iterations=iterations, bec_decoder=True, label='Erasure Decoder', code_class=code_class, read_lengths=read_lengths)) - if uncoded: - print(decoding_errors_fer(k, n, dv, dc, graph, C, symbols, motifs, n_picks, iterations=iterations, uncoded=True, label='Uncoded', code_class=code_class, read_lengths=read_lengths)) - - print(decoding_errors_fer(k, n, dv, dc, graph, C, symbols, motifs, n_picks, iterations=iterations, label=f'CC Decoder', code_class=code_class, read_lengths=read_lengths)) + elif uncoded: + print(decoding_errors_fer(k, n, dv, dc, graph, C, symbols, motifs, n_picks, iterations=iterations, uncoded=True, label=f'{label} Uncoded', code_class=code_class, read_lengths=read_lengths)) + + print(decoding_errors_fer(k, n, dv, dc, graph, C, symbols, motifs, n_picks, iterations=iterations, label=f'{label} CC Decoder', code_class=code_class, read_lengths=read_lengths, masked=masked)) plt.grid() plt.legend() - plt.show() + #plt.show() if __name__ == "__main__": with Profile() as prof: n_motifs, n_picks = 8, 4 dv, dc, ffdim = 3, 9, 67 - k, n = 100, 150 + k, n = 30, 45 L, M = 50, 1002 read_length = 6 - read_lengths = np.arange(4,5) - run_fer(n_motifs, n_picks, dv, dc, k, n, L, M, ffdim, code_class="sc_", saved_code=False, uncoded=False, bec_decoder=False, read_lengths=read_lengths) + read_lengths = np.arange(5,12) + + Harr = r.get_H_arr(dv, dc, k, n) + masked = True + + run_fer(n_motifs, n_picks, dv, dc, k, n, L, M, ffdim, code_class="", saved_code=False, uncoded=True, bec_decoder=False, read_lengths=read_lengths, zero_codeword=True, label="ZeroCW", Harr=Harr, masked=masked) + + run_fer(n_motifs, n_picks, dv, dc, k, n, L, M, ffdim, code_class="", saved_code=False, uncoded=True, bec_decoder=False, read_lengths=read_lengths, zero_codeword=False, label="FullCW", Harr=Harr, masked=masked) + plt.show() ( Stats(prof) .strip_dirs() diff --git a/distracted_coupon_collector.py b/distracted_coupon_collector.py index 6765de1..6063a79 100644 --- a/distracted_coupon_collector.py +++ b/distracted_coupon_collector.py @@ -15,7 +15,7 @@ import re from cProfile import Profile from coupon_collector import get_parameters, get_parameters_sc_ldpc -from tanner_qspa import TannerQSPA +from tanner_qspa import TannerQSPA, get_max_symbol def choose_symbols(n_motifs, picks): """Creates Symbol Array as a combination of Motifs @@ -51,6 +51,7 @@ def distracted_coupon_collector_channel(symbol, R, P, n_motifs): reads.append(random.randint(1, n_motifs)) return reads + def get_parameters(n_motifs, n_picks, dv, dc, k, n, ffdim, zero_codeword=False, display=True, Harr=None, H=None, G=None): """ Returns the parameters for the simulation """ @@ -59,10 +60,10 @@ def get_parameters(n_motifs, n_picks, dv, dc, k, n, ffdim, zero_codeword=False, symbols = choose_symbols(n_motifs, n_picks) - symbols.pop(-1) - symbols.pop(-2) - symbols.pop(-3) - + symbols.pop() + symbols.pop() + symbols.pop() + symbol_keys = np.arange(0, ffdim) #graph = VariableTannerGraph(dv, dc, k, n, ffdim=ffdim) @@ -70,12 +71,15 @@ def get_parameters(n_motifs, n_picks, dv, dc, k, n, ffdim, zero_codeword=False, if Harr is None: Harr = r.get_H_arr(dv, dc, k, n) - H = r.get_H_Matrix(dv, dc, k, n, Harr) - if zero_codeword: - G = np.zeros([k,n], dtype=int) - else: - G = r.parity_to_generator(H, ffdim=ffdim) + + H = r.get_H_Matrix(dv, dc, k, n, Harr) + #print(H) + if zero_codeword: + G = np.zeros([k,n], dtype=int) + else: + G = r.parity_to_generator(H, ffdim=ffdim) + graph.establish_connections(Harr) if np.any(np.dot(G, H.T) % ffdim != 0): @@ -102,9 +106,9 @@ def get_parameters_sc_ldpc(n_motifs, n_picks, L, M, dv, dc, k, n, ffdim, zero_co symbols = choose_symbols(n_motifs, n_picks) - symbols.pop(-1) - symbols.pop(-2) - symbols.pop(-3) + symbols.pop() + symbols.pop() + symbols.pop() symbol_keys = np.arange(0, ffdim) @@ -183,6 +187,7 @@ def simulate_reads(C, symbols, read_length, P, n_motifs, n_picks): reads (list) : [length of Codeword, no. of symbols] list of all the reads as likelihoods """ + likelihood_arr = [] for i in C: motif_occurences = np.zeros(n_motifs) @@ -194,34 +199,74 @@ def simulate_reads(C, symbols, read_length, P, n_motifs, n_picks): symbol_likelihoods = get_symbol_likelihood(n_picks, motif_occurences, P) likelihood_arr.append(symbol_likelihoods) + + #motif_occurrences = np.array([[2,0,0,0], [0,1,1,0], [1,1,0,0]]) + #likelihood_arr = np.array([get_symbol_likelihood(n_picks, i, P) for i in motif_occurrences]) + return likelihood_arr -def decoding_errors_fer(k, n, dv, dc, P, H, G, GF, graph, C, symbols, n_motifs, n_picks, decoder=None, decoding_failures_parameter=200, max_iterations=20000, iterations=50, uncoded=False, bec_decoder=False, label=None, code_class="", read_lengths=np.arange(1,20)): + +def unmask_reordering(symbol_likelihood_arr, mask, ffdim): + """Unmasks the Codeword after generating likelihood arrays post channel transmission""" + + unmasked_likelihood_arr = np.zeros((len(mask), ffdim)) + + for j in range(len(mask)): + for i in range(ffdim): + unmasked_likelihood_arr[j,i] = symbol_likelihood_arr[j,(i+mask[j]) % ffdim] + + return unmasked_likelihood_arr + + +def decoding_errors_fer(k, n, dv, dc, ffdim, P, H, G, GF, graph, C, symbols, n_motifs, n_picks, decoder=None, masked=False, decoding_failures_parameter=50, max_iterations=50, iterations=50, uncoded=False, bec_decoder=False, label=None, code_class="", read_lengths=np.arange(1,20)): frame_error_rate = [] max_iterations = max_iterations decoding_failures_parameter = decoding_failures_parameter # But can be adjusted as a parameter + symbol_keys = np.arange(0, ffdim) for i in tqdm(read_lengths): decoding_failures, iterations, counter = 0, 0, 0 for j in tqdm(range(max_iterations)): - symbol_likelihoods_arr = np.array(simulate_reads(C, symbols, i, P, n_motifs, n_picks)) + + input_arr = [random.choice(symbol_keys) for i in range(k)] + #print(G) + C = np.dot(input_arr, G) % ffdim + + if masked: + mask = [np.random.randint(ffdim) for i in range(n)] + C2 = [(C[i] + mask[i]) % ffdim for i in range(len(C))] + symbol_likelihoods_arr = np.array(simulate_reads(C2, symbols, i, P, n_motifs, n_picks)) + #print(mask[:5]) + #print(symbol_likelihoods_arr[:5]) + symbol_likelihoods_arr = unmask_reordering(symbol_likelihoods_arr, mask, ffdim) + #print(symbol_likelihoods_arr[:5]) + #exit() + else: + symbol_likelihoods_arr = np.array(simulate_reads(C, symbols, i, P, n_motifs, n_picks)) #symbol_likelihoods_arr = [[1/67]*67]*n #print(symbol_likelihoods_arr) - if not decoder: - z = graph.qspa_decode(symbol_likelihoods_arr, H, GF) - else: + if uncoded: + z = [get_max_symbol(i) for i in symbol_likelihoods_arr] + elif decoder: z = decoder.decode(symbol_likelihoods_arr, max_iter=20) + else: + z = graph.qspa_decode(symbol_likelihoods_arr, H, GF) + + #exit() #print(C) - print(z) + #print(z) + if np.array_equal(C, z): counter += 1 + #print("Correct") else: + #print("Incorrect") decoding_failures+=1 iterations += 1 @@ -233,8 +278,8 @@ def decoding_errors_fer(k, n, dv, dc, P, H, G, GF, graph, C, symbols, n_motifs, error_rate = (iterations - counter)/iterations frame_error_rate.append(error_rate) - plt.plot(read_lengths, frame_error_rate, 'o') - plt.plot(read_lengths, frame_error_rate, label=label) + plt.plot(read_lengths, frame_error_rate, 'o', label=label) + plt.plot(read_lengths, frame_error_rate) plt.title("Frame Error Rate for DCC for {}{}-{} {}-{} for 8C4 Symbols".format(code_class, k, n, dv, dc)) plt.ylabel("Frame Error Rate") plt.xlabel("Read Length") @@ -244,25 +289,29 @@ def decoding_errors_fer(k, n, dv, dc, P, H, G, GF, graph, C, symbols, n_motifs, plt.ylim(0,1) plt.xticks(np.arange(read_lengths[0], read_lengths[-1], 1)) + print(frame_error_rate) + return frame_error_rate -def run_fer(n_motifs, n_picks, dv, dc, k, n, L, M, ffdim, P, code_class="", iterations=10, cc_decoder=False, bec_decoder=False, uncoded=False, read_lengths=np.arange(1,20), max_iter=10, zero_codeword=False, graph_decoding=False, label=None): +def run_fer(n_motifs, n_picks, dv, dc, k, n, L, M, ffdim, P, code_class="", iterations=10, cc_decoder=False, masked=False, bec_decoder=False, uncoded=False, read_lengths=np.arange(1,20), max_iter=10, zero_codeword=False, graph_decoding=False, label=None, Harr=None): if code_class == "sc_": H, G, graph, C, symbols, motifs = get_parameters_sc_ldpc(n_motifs, n_picks, L, M, dv, dc, k, n, ffdim, zero_codeword=zero_codeword, display=False, Harr=None, H=None, G=None) else: - H, G, graph, C, symbols, motifs = get_parameters(n_motifs, n_picks, dv, dc, k, n, ffdim, zero_codeword=zero_codeword, display=False, Harr=None, H=None, G=None) + H, G, graph, C, symbols, motifs = get_parameters(n_motifs, n_picks, dv, dc, k, n, ffdim, zero_codeword=zero_codeword, display=False, Harr=Harr, H=None, G=None) GF = galois.GF(ffdim) GFH = GF(H.astype(int)) # * GF(np.random.choice(GF.elements[1:], siz GFK = GF(G.astype(int)) if graph_decoding: - decoding_errors_fer(k, n, dv, dc, P, GFH, GFK, GF, graph, C, symbols, n_motifs, n_picks, label="Graph QSPA", read_lengths=read_lengths) + decoding_errors_fer(k, n, dv, dc, ffdim, P, GFH, G, GF, graph, C, symbols, n_motifs, n_picks, masked=masked, label=label + " Graph QSPA", read_lengths=read_lengths) + elif uncoded: + decoding_errors_fer(k, n, dv, dc, ffdim, P, GFH, G, GF, graph, C, symbols, n_motifs, n_picks, uncoded=True, masked=masked, label=label + " Uncoded", read_lengths=read_lengths) else: decoder = QSPADecoder(n, n-k, GF, GFH) - decoding_errors_fer(k, n, dv, dc, P, GFH, GFK, GF, graph, C, symbols, n_motifs, n_picks, decoder=decoder, label="Matrice QSPA", read_lengths=read_lengths) + decoding_errors_fer(k, n, dv, dc, ffdim, P, GFH, G, GF, graph, C, symbols, n_motifs, n_picks, decoder=decoder, masked=masked, label= label + " Matrice QSPA", read_lengths=read_lengths) plt.legend() plt.grid() @@ -272,16 +321,22 @@ def run_fer(n_motifs, n_picks, dv, dc, k, n, L, M, ffdim, P, code_class="", iter if __name__ == "__main__": with Profile() as prof: n_motifs, n_picks = 8, 4 - dv, dc, ffdim, P = 3, 9, 67, 2 * 0.038860387943791645 - k, n = 22, 33 + dv, dc, ffdim, P = 3, 9, 67, 0.08 + k, n = 30, 45 L, M = 12, 51 - read_length = 6 - read_lengths = np.arange(9, 10) + read_lengths = np.arange(6, 15) #run_fer(n_motifs, n_picks, dv, dc, k, n, L, M, ffdim, P, code_class="", uncoded=False, zero_codeword=False, bec_decoder=False, graph_decoding=False, read_lengths=read_lengths) - run_fer(n_motifs, n_picks, dv, dc, k, n, L, M, ffdim, P, code_class="", uncoded=False, zero_codeword=False, bec_decoder=False, graph_decoding=True, read_lengths=read_lengths) - run_fer(n_motifs, n_picks, dv, dc, k, n, L, M, ffdim, P, code_class="", uncoded=False, zero_codeword=True, bec_decoder=False, graph_decoding=True, read_lengths=read_lengths) + + Harr = r.get_H_arr(dv, dc, k, n) + + run_fer(n_motifs, n_picks, dv, dc, k, n, L, M, ffdim, P, code_class="", uncoded=False, zero_codeword=True, masked=False, bec_decoder=False, graph_decoding=True, read_lengths=read_lengths, label="Zero", Harr=Harr) + + run_fer(n_motifs, n_picks, dv, dc, k, n, L, M, ffdim, P, code_class="", uncoded=False, zero_codeword=False, masked=False, bec_decoder=False, graph_decoding=True, read_lengths=read_lengths, label="FullCW", Harr=Harr) + + + plt.show() ( Stats(prof) diff --git a/tanner_qspa.py b/tanner_qspa.py index bbd28ec..64cc9bb 100644 --- a/tanner_qspa.py +++ b/tanner_qspa.py @@ -5,6 +5,8 @@ def get_max_symbol(prob_arr): max_val = np.max(prob_arr) + + # Numerical issues ? Strict Equality? max_indices = [i for i, val in enumerate(prob_arr) if val == max_val] #print(prob_arr) #print(max_indices) @@ -32,7 +34,7 @@ def initialize_cn_links(self): for vn_index in cn.links: self.cn_links[(cn_index, vn_index)] = np.zeros(67) - def qspa_decode(self, symbol_likelihood_arr, H, GF, max_iterations=20): + def qspa_decode(self, symbol_likelihood_arr, H, GF, max_iterations=10): """Decodes using QSPA """ self.GF = GF @@ -67,25 +69,26 @@ def qspa_decode(self, symbol_likelihood_arr, H, GF, max_iterations=20): #print(sum(random.choice(list(self.cn_links.items()))[1])) parity = not np.matmul(H, max_prob_codeword).any() + + if parity: - print("Decoding converges") + #print("Decoding converges") return max_prob_codeword + self.vn_update(symbol_likelihood_arr) #print(sum(random.choice(list(self.vn_links.items()))[1])) - - if iterations > max_iterations: + + if np.array_equal(max_prob_codeword, prev_max_prob_codeword) or iterations > max_iterations: break - #if np.array_equal(max_prob_codeword, prev_max_prob_codeword) or iterations > max_iterations: - # break prev_max_prob_codeword = max_prob_codeword iterations+=1 #print(f"Iteration {iterations}") - print("Decoding does not converge") + #print("Decoding does not converge") return max_prob_codeword def get_max_prob_codeword(self, P, GF):