Merge pull request #55 from jyaacoub/development

Development
jyaacoub · Nov 3, 2023 · a0a2284 · a0a2284
2 parents a17a5e7 + cc9ae9e
commit a0a2284
Show file tree

Hide file tree

Showing 29 changed files with 20,591 additions and 207 deletions.
diff --git a/.gitignore b/.gitignore
@@ -207,6 +207,7 @@ lib/mgltools_x86_64Linux2_1.5.7/MGLToolsPckgs/AutoDockTools/Utilities24/*
 lib/mgltools_x86_64Linux2_1.5.7p1.tar.gz
 
 log_test/
+slurm_tests/
 slurm_out_DDP/
 /*.sh
 results/model_checkpoints/ours/*.model*
diff --git a/playground.py b/playground.py
@@ -1,31 +1,19 @@
-#%%
-from src.data_processing.datasets import PDBbindDataset
-from src.utils import config as cfg
-import pandas as pd
-import matplotlib.pyplot as plt
+# %%
+from src.data_analysis.figures import prepare_df, fig3_edge_feat
+from src.utils import config
 
-# d0 = pd.read_csv(f'{cfg.DATA_ROOT}/DavisKibaDataset/davis/nomsa_anm/full/XY.csv', index_col=0)
-d0 = pd.read_csv(f'{cfg.DATA_ROOT}/PDBbindDataset/nomsa_anm/full/XY.csv', index_col=0)
+from transformers import AutoTokenizer, AutoModel
 
-d0['len'] = d0.prot_seq.str.len()
 
-# %%
-n, bins, patches = plt.hist(d0['len'], bins=20)
-# Set labels and title
-plt.xlabel('Protein Sequence length')
-plt.ylabel('Frequency')
-plt.title('Histogram of Protein Sequence length (davis)')
+df = prepare_df('results/model_media/model_stats.csv')
 
-# Add counts to each bin
-for count, x, patch in zip(n, bins, patches):
-    plt.text(x + 0.5, count, str(int(count)), ha='center', va='bottom')
+# %%
+fig3_edge_feat(df, show=True, exclude=[])
 
-cutoff= 1500
-print(f"Eliminating codes above {cutoff} length would reduce the dataset by: {len(d0[d0['len'] > cutoff])}")
-print(f"\t - Eliminates {len(d0[d0['len'] > cutoff].index.unique())} unique proteins")
+# %%
+print('test')
 
-# %% -d PDBbind -f nomsa -e anm
-from src.utils.loader import Loader
-d1 = Loader.load_dataset('PDBbind', 'nomsa', 'anm')
+#### ChemGPT ####
 
-# %%
+tokenizer = AutoTokenizer.from_pretrained("ncfrey/ChemGPT-4.7M")
+model = AutoModel.from_pretrained("ncfrey/ChemGPT-4.7M")
diff --git a/...d/EDIM_PDBbindD_nomsaF_anmE_64B_0.0001LR_0.4D_2000E_originalLF_binaryLE_his.png b/...d/EDIM_PDBbindD_nomsaF_anmE_64B_0.0001LR_0.4D_2000E_originalLF_binaryLE_his.png
diff --git a/.../EDIM_PDBbindD_nomsaF_anmE_64B_0.0001LR_0.4D_2000E_originalLF_binaryLE_loss.png b/.../EDIM_PDBbindD_nomsaF_anmE_64B_0.0001LR_0.4D_2000E_originalLF_binaryLE_loss.png
diff --git a/...IM_PDBbindD_nomsaF_anmE_64B_0.0001LR_0.4D_2000E_originalLF_binaryLE_scatter.png b/...IM_PDBbindD_nomsaF_anmE_64B_0.0001LR_0.4D_2000E_originalLF_binaryLE_scatter.png