Skip to content

Commit

Permalink
Merge pull request #51 from jyaacoub/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
jyaacoub authored Oct 31, 2023
2 parents 07c6d05 + 0ab364e commit a17a5e7
Show file tree
Hide file tree
Showing 26 changed files with 4,467 additions and 171 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -207,5 +207,6 @@ lib/mgltools_x86_64Linux2_1.5.7/MGLToolsPckgs/AutoDockTools/Utilities24/*
lib/mgltools_x86_64Linux2_1.5.7p1.tar.gz

log_test/
slurm_tests/
slurm_out_DDP/
/*.sh
results/model_checkpoints/ours/*.model*
122 changes: 122 additions & 0 deletions docs/req.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
absl-py==1.4.0
aiosignal==1.3.1
astor==0.8.1
asttokens==2.2.1
attrs==23.1.0
autograd==1.5
autograd-gamma==0.5.0
backcall==0.2.0
biopython==1.79
certifi==2023.5.7
charset-normalizer==3.1.0
click==8.1.7
cloudpickle==2.2.1
cmake==3.26.4
comm==0.1.3
contourpy==1.0.7
cycler==0.11.0
debugpy==1.6.7
decorator==5.1.1
dm-tree==0.1.8
executing==1.2.0
fastjsonschema==2.17.1
filelock==3.12.2
flake8==6.1.0
fonttools==4.39.4
formulaic==0.6.1
frozenlist==1.4.0
fsspec==2023.6.0
future==0.18.3
grpcio==1.57.0
huggingface-hub==0.16.4
idna==3.4
interface-meta==1.3.0
ipykernel==6.23.1
ipython==8.13.2
jedi==0.18.2
Jinja2==3.1.2
joblib==1.2.0
jsonschema==4.17.3
jupyter_client==8.2.0
jupyter_core==5.3.0
kiwisolver==1.4.4
lifelines==0.27.7
lit==16.0.5
MarkupSafe==2.1.3
matplotlib==3.7.1
matplotlib-inline==0.1.6
mccabe==0.7.0
mpmath==1.3.0
msgpack==1.0.5
nbformat==5.9.0
nest-asyncio==1.5.6
networkx==3.1
numpy==1.23.5
nvidia-cublas-cu11==11.10.3.66
nvidia-cuda-cupti-cu11==11.7.101
nvidia-cuda-nvrtc-cu11==11.7.99
nvidia-cuda-runtime-cu11==11.7.99
nvidia-cudnn-cu11==8.5.0.96
nvidia-cufft-cu11==10.9.0.58
nvidia-curand-cu11==10.2.10.91
nvidia-cusolver-cu11==11.4.0.1
nvidia-cusparse-cu11==11.7.4.91
nvidia-nccl-cu11==2.14.3
nvidia-nvtx-cu11==11.7.91
packaging==23.1
pandas==1.5.3
parso==0.8.3
pexpect==4.8.0
pickleshare==0.7.5
Pillow==9.5.0
platformdirs==3.5.1
plotly==5.14.1
ProDy==2.4.1
prompt-toolkit==3.0.38
protobuf==4.24.1
psutil==5.9.5
ptyprocess==0.7.0
pure-eval==0.2.2
pyarrow==12.0.1
pycodestyle==2.11.0
pyflakes==3.1.0
Pygments==2.15.1
pyparsing==3.0.9
pyrsistent==0.19.3
python-dateutil==2.8.2
pytz==2023.3
PyYAML==6.0.1
pyzmq==25.1.0
rapidfuzz==3.3.0
ray==2.6.3
rdkit==2023.3.1
regex==2023.6.3
requests==2.31.0
safetensors==0.3.1
scikit-learn==1.2.2
scipy==1.10.1
seaborn==0.11.2
six==1.16.0
stack-data==0.6.2
statannotations==0.6.0
submitit==1.4.5
sympy==1.12
tabulate==0.9.0
tenacity==8.2.2
tensorboardX==2.6.2.2
thefuzz==0.20.0
threadpoolctl==3.1.0
tokenizers==0.13.3
torch==2.0.1
torch-geometric==2.3.1
torchsummary==1.5.1
tornado==6.3.2
tqdm==4.65.0
traitlets==5.9.0
transformers==4.31.0
triton==2.0.0
typing_extensions==4.6.3
tzdata==2023.3
urllib3==2.0.2
wcwidth==0.2.6
wrapt==1.15.0
38 changes: 19 additions & 19 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
numpy
pandas
tqdm
rdkit
scipy
numpy==1.23.5
pandas==1.5.3
tqdm==4.65.0
rdkit==2023.3.1
scipy==1.10.1

# for generating figures:
matplotlib
seaborn
statannotations
matplotlib==3.7.1
seaborn==0.11.2
statannotations==0.6.0

lifelines # used for concordance index calc
lifelines==0.27.7 # used for concordance index calc
#biopython # used for cmap

# model building
torch
torch_geometric
transformers # huggingface needed for esm
torch==2.0.1
torch-geometric==2.3.1
transformers==4.31.0 # huggingface needed for esm

# optional:
torchsummary
tabulate # for torch_geometric.nn.summary
ipykernel
plotly
requests
torchsummary==1.5.1
tabulate==0.9.0 # for torch_geometric.nn.summary
ipykernel==6.23.1
plotly==5.14.1
requests==2.31.0
#ray[tune]

submitit
ProDy
submitit==1.4.5
ProDy==2.4.1
31 changes: 27 additions & 4 deletions playground.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,31 @@
# %%
from src.data_analysis.figures import prepare_df, fig3_edge_feat
df = prepare_df('results/model_media/model_stats.csv')
#%%
from src.data_processing.datasets import PDBbindDataset
from src.utils import config as cfg
import pandas as pd
import matplotlib.pyplot as plt

# d0 = pd.read_csv(f'{cfg.DATA_ROOT}/DavisKibaDataset/davis/nomsa_anm/full/XY.csv', index_col=0)
d0 = pd.read_csv(f'{cfg.DATA_ROOT}/PDBbindDataset/nomsa_anm/full/XY.csv', index_col=0)

d0['len'] = d0.prot_seq.str.len()

# %%
fig3_edge_feat(df, show=True, exclude=[])
n, bins, patches = plt.hist(d0['len'], bins=20)
# Set labels and title
plt.xlabel('Protein Sequence length')
plt.ylabel('Frequency')
plt.title('Histogram of Protein Sequence length (davis)')

# Add counts to each bin
for count, x, patch in zip(n, bins, patches):
plt.text(x + 0.5, count, str(int(count)), ha='center', va='bottom')

cutoff= 1500
print(f"Eliminating codes above {cutoff} length would reduce the dataset by: {len(d0[d0['len'] > cutoff])}")
print(f"\t - Eliminates {len(d0[d0['len'] > cutoff].index.unique())} unique proteins")

# %% -d PDBbind -f nomsa -e anm
from src.utils.loader import Loader
d1 = Loader.load_dataset('PDBbind', 'nomsa', 'anm')

# %%
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit a17a5e7

Please sign in to comment.