From 60ee5db995f53e129bd33492dfaa08c6664ae455 Mon Sep 17 00:00:00 2001 From: jyaacoub Date: Sun, 3 Nov 2024 19:47:14 -0500 Subject: [PATCH] feat(mutagenesis): support for custom point mutations --- run_mutagenesis.py | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/run_mutagenesis.py b/run_mutagenesis.py index ca1f4a7..963300c 100644 --- a/run_mutagenesis.py +++ b/run_mutagenesis.py @@ -109,7 +109,43 @@ original_seq = pdb_original.sequence original_pkd = MODEL(pro.to(DEVICE), lig.to(DEVICE)) -print("Original pkd:", original_pkd) +print("Original pkd:", original_pkd, end="\n\n") + +if MUTATIONS: + mut_pdb_file = run_modeller_multiple(PDB_FILE, MUTATIONS) + print(mut_pdb_file) + pro, _ = get_protein_features(mut_pdb_file, MODEL_PARAMS['feature_opt'], MODEL_PARAMS['edge_opt']) + mut_pkd = MODEL(pro.to(DEVICE), lig.to(DEVICE)) + print("\nMutated pkd:", mut_pkd) +else: + logging.warning("No mutations were passed in - running full saturation mutagenesis") + # zero indexed res range to mutate: + res_range = (max(RES_START, 0), min(RES_END, len(original_seq))) + + from src.utils.mutate_model import run_modeller + amino_acids = ResInfo.amino_acids[:-1] # not including "X" - unknown + muta = np.zeros(shape=(len(amino_acids), len(original_seq))) + + with tqdm(range(*res_range), ncols=100, total=(res_range[1]-res_range[0]), + desc='Saturation mutagenesis') as t: + for j in t: + for i, AA in enumerate(amino_acids): + if i%2 == 0: + t.set_postfix(res=j, AA=i+1) + + if original_seq[j] == AA: # skip same AA modifications + muta[i,j] = original_pkd + continue + out_pdb_fp = run_modeller(PDB_FILE, j+1, ResInfo.code_to_pep[AA], "A") + + pro, _ = get_protein_features(out_pdb_fp, MODEL_PARAMS['feature_opt'], MODEL_PARAMS['edge_opt']) + assert pro.pro_seq != original_seq and pro.pro_seq[j] == AA, \ + f"ERROR in modeller, {pro.pro_seq} == {original_seq} \nor {pro.pro_seq[j]} != {AA}" + + muta[i,j] = MODEL(pro.to(DEVICE), lig.to(DEVICE)) + + # delete after use + os.remove(out_pdb_fp) if MUTATIONS: mut_pdb_file = run_modeller_multiple(PDB_FILE, MUTATIONS)