Skip to content

Commit

Permalink
Merge pull request #55 from jyaacoub/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
jyaacoub authored Nov 3, 2023
2 parents a17a5e7 + cc9ae9e commit a0a2284
Show file tree
Hide file tree
Showing 29 changed files with 20,591 additions and 207 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ lib/mgltools_x86_64Linux2_1.5.7/MGLToolsPckgs/AutoDockTools/Utilities24/*
lib/mgltools_x86_64Linux2_1.5.7p1.tar.gz

log_test/
slurm_tests/
slurm_out_DDP/
/*.sh
results/model_checkpoints/ours/*.model*
36 changes: 12 additions & 24 deletions playground.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,19 @@
#%%
from src.data_processing.datasets import PDBbindDataset
from src.utils import config as cfg
import pandas as pd
import matplotlib.pyplot as plt
# %%
from src.data_analysis.figures import prepare_df, fig3_edge_feat
from src.utils import config

# d0 = pd.read_csv(f'{cfg.DATA_ROOT}/DavisKibaDataset/davis/nomsa_anm/full/XY.csv', index_col=0)
d0 = pd.read_csv(f'{cfg.DATA_ROOT}/PDBbindDataset/nomsa_anm/full/XY.csv', index_col=0)
from transformers import AutoTokenizer, AutoModel

d0['len'] = d0.prot_seq.str.len()

# %%
n, bins, patches = plt.hist(d0['len'], bins=20)
# Set labels and title
plt.xlabel('Protein Sequence length')
plt.ylabel('Frequency')
plt.title('Histogram of Protein Sequence length (davis)')
df = prepare_df('results/model_media/model_stats.csv')

# Add counts to each bin
for count, x, patch in zip(n, bins, patches):
plt.text(x + 0.5, count, str(int(count)), ha='center', va='bottom')
# %%
fig3_edge_feat(df, show=True, exclude=[])

cutoff= 1500
print(f"Eliminating codes above {cutoff} length would reduce the dataset by: {len(d0[d0['len'] > cutoff])}")
print(f"\t - Eliminates {len(d0[d0['len'] > cutoff].index.unique())} unique proteins")
# %%
print('test')

# %% -d PDBbind -f nomsa -e anm
from src.utils.loader import Loader
d1 = Loader.load_dataset('PDBbind', 'nomsa', 'anm')
#### ChemGPT ####

# %%
tokenizer = AutoTokenizer.from_pretrained("ncfrey/ChemGPT-4.7M")
model = AutoModel.from_pretrained("ncfrey/ChemGPT-4.7M")
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit a0a2284

Please sign in to comment.