diff --git a/.gitignore b/.gitignore index 997a189..62a1624 100644 --- a/.gitignore +++ b/.gitignore @@ -109,4 +109,6 @@ ENV/ # data for tests prolint2/data/.* .DS_Store -prolint2/.DS_Store \ No newline at end of file +prolint2/.DS_Store +prolint2/data/contacts.csv +prolint2/data/contacts_metrics.csv diff --git a/bin/prolint2 b/bin/prolint2 index 62484ca..161fc5e 100755 --- a/bin/prolint2 +++ b/bin/prolint2 @@ -6,12 +6,14 @@ r"""Argument parser to use prolint2 from the command-line :Copyright: MIT License """ +import os import argparse -import prolint2._version as vers -from prolint2.server.server import start_server import configparser + from prolint2 import get_config -import os +import prolint2._version as vers +from prolint2.server.server import ProLintDashboard + # Getting the config file config = configparser.ConfigParser(allow_no_value=True) @@ -118,7 +120,9 @@ prolint2_parser.add_argument( args = prolint2_parser.parse_args() # Starting the server -start_server(payload=args, reloader=False, i_bool=args.i_bool, e_file=args.e_file) +# start_server(payload=args, reloader=False, i_bool=args.i_bool, e_file=args.e_file) +app = ProLintDashboard() +app.start_server(payload=args) import sys sys.exit() diff --git a/docs/notebooks/01_The_ProLint_Universe.ipynb b/docs/notebooks/01_The_ProLint_Universe.ipynb new file mode 100644 index 0000000..04b546d --- /dev/null +++ b/docs/notebooks/01_The_ProLint_Universe.ipynb @@ -0,0 +1,415 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Working with the ProLint `Universe` object" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "from prolint2 import Universe\n", + "from MDAnalysis import Universe as MDUniverse\n", + "from prolint2.sampledata import GIRK" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ProLint can read data directly from another `Universe` object. If you already have a pipeline that uses MDAnalysis to read the data, you can easily switch to ProLint by replacing the `Universe` object with a `ProLint.Universe` object or by directly reading the data from the MDAnalysis." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Below we create an MDAnalysis `Universe` instance and define a custom `query` and `database`." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Use MDAnalysis to create a Universe instance\n", + "mda_u = MDUniverse(GIRK.coordinates, GIRK.trajectory)\n", + "mda_u_query = mda_u.select_atoms('protein and name BB')\n", + "mda_u_db = mda_u.select_atoms('resname POPE')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can now use the MDAnalysis Universe instance to create a ProLint Universe instance:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Use `mda_u` to create a ProLint Universe instance\n", + "u = Universe(universe=mda_u)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also directly use the `query` and `database` information to create a ProLint Universe instance:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO: This does not work, because of issues with our `macro` definitions\n", + "# NOTE: We need to make a decision on what to do with `macro`'s in the future, since I do not think we are currently using them anywhere\n", + "# u = Universe(universe=mda_u, query=mda_u_query, db=mda_u_db)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Accessing the query and database AtomGroups" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(>,\n", + " >)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "u.query, u.database" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice how both the `query` and `database` AtomGroups are ProLint wrappers around the MDAnalysis AtomGroups. This means that you get to keep all the functionality of MDAnalysis AtomGroups, but you can also use the ProLint-specific functions, such as making changes to the `query` and `database` AtomGroups or accessing ProLint-specific attributes." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array(['CHOL', 'POPE', 'POPS'], dtype=object),\n", + " ['POPE', 'POPE', 'CHOL'],\n", + " array([3050]))" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "u.database.unique_resnames, u.database.get_resnames([2345, 2346, 3050]), u.database.filter_resids_by_resname([2345, 2346, 3050], 'CHOL')" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{1: 'ARG', 2: 'GLN', 3: 'ARG', 4: 'TYR', 5: 'MET'}" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "u.query.get_resnames([1, 2, 3, 4, 5], out=dict)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Modifying the query and database AtomGroups" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### `Remove` from ProLint AtomGroup" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2956, 2764)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Remove all residues with resname 'ARG' from the query\n", + "s = u.query.remove(resname='ARG')\n", + "u.query.n_atoms, s.n_atoms" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2956, 2532)" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Remove all residues with resname 'ARG' and all residue numbers lower than 100\n", + "s = u.query.remove(resname='ARG', resnum=[*range(100)])\n", + "u.query.n_atoms, s.n_atoms" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2956, 1543)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# More complicated example: \n", + "# Remove all residues with resname 'ARG' and the residue number 1, and all atoms with the name 'BB' and the atomids 1-9\n", + "s = u.query.remove(resname='ARG', resnum=[1], atomname=['BB'], atomids=[1, 2, 3, 4, 5, 6, 7, 8, 9])\n", + "u.query.n_atoms, s.n_atoms" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`Important`: \n", + "1. `remove` combines all input arguments into a single selection string concatenated with `or` statements.\n", + "2. The above code returns a new ProLint AtomGroup, but does not modify the original AtomGroup. To modify the original AtomGroup, you need to use assignment. See below." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2764" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "u.query = u.query.remove(resname='ARG')\n", + "u.query.n_atoms" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 13/13 [00:00<00:00, 161.23it/s]\n" + ] + } + ], + "source": [ + "# Let's compute the contacts between the modified query and the database\n", + "c = u.compute_contacts(cutoff=7)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "419" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Number of residues we have computed contacts for\n", + "len(c.contact_frames.keys())" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### `Add` to ProLint AtomGroup" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2956" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's add back the residues we removed from the query\n", + "u.query = u.query.add(resname='ARG')\n", + "u.query.n_atoms" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 13/13 [00:00<00:00, 259.69it/s]\n" + ] + } + ], + "source": [ + "# Let's compute the contacts between the query and the database\n", + "c = u.compute_contacts(cutoff=7)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "442" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Number of residues we have computed contacts for\n", + "len(c.contact_frames.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ufcc-dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/notebooks/02_Universe_Properties.ipynb b/docs/notebooks/02_Universe_Properties.ipynb new file mode 100644 index 0000000..3137852 --- /dev/null +++ b/docs/notebooks/02_Universe_Properties.ipynb @@ -0,0 +1,299 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Modifying `Universe` properties" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Simulations are done at different conditions and we may be interested to represent results differently. ProLint now supports dynamically changing simulation properties. " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/danielramirez/mambaforge/envs/prolint2/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "from prolint2 import Universe\n", + "from prolint2.sampledata import GIRK" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "u = Universe(GIRK.coordinates, GIRK.trajectory)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Disply the current properties of the `Universe` object:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'units': 'us',\n", + " 'normalizer': 'time_fraction',\n", + " 'unit_conversion_factor': 1e-06,\n", + " 'norm_factor': 0.08333333333333333}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "u.params" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Most of the parameters are self explanatory, but some of them are not. Here is a list of the parameters and their meaning:\n", + "- `units`: this is the unit we want to display the results in. \n", + "- `normalizer`: this is how we want to normalize the results. ProLint supports three types of normalization: \n", + " - `counts`: Display the raw counts (i.e., count of frame numbers)\n", + " - `actual_time`: Normalize by the true simulation -> (dt/unit_conversion_factor)\n", + " - `time_fraction`: normalize by the total time of the simulation and divide by the total time of the simulation (i.e., numbers represent fractions of time spent in contact)\n", + "- `unit_conversion_factor`: this is the conversion factor to convert the raw counts to the desired unit.\n", + "- `norm_factor`: this is the normalization factor that is updated based on the normalization type. For example, if the normalization type is `time_fraction`, then this factor is `dt / totaltime)`." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Compute contacts with default parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 13/13 [00:00<00:00, 137.25it/s]\n" + ] + } + ], + "source": [ + "default = u.compute_contacts(cutoff=7)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Results are contact durations measured as a fraction of the total simulation time and expressed in `us`" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[0.08333333333333333,\n", + " 0.08333333333333333,\n", + " 0.08333333333333333,\n", + " 0.08333333333333333,\n", + " 0.25]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "residue17_default_contacts = default.contacts.get(17).get('POPE')\n", + "residue17_default_contacts[:5]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Change the parameters and re-compute contacts" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'units': 'us',\n", + " 'normalizer': 'actual_time',\n", + " 'unit_conversion_factor': 1e-06,\n", + " 'norm_factor': 0.39999999999999997}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "u.normalize_by = 'actual_time' # rather than `time_fraction`, let's normalize by the total time of the trajectory\n", + "u.params" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice how `norm_factor` is updated based on the normalization type." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'units': 'ns',\n", + " 'normalizer': 'actual_time',\n", + " 'unit_conversion_factor': 0.001,\n", + " 'norm_factor': 400.0}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "u.units = 'ns' # let's change the units to nanoseconds\n", + "u.params" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice how `unit_conversion_factor` and `norm_factor` are updated based on the `units` parameter." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 13/13 [00:00<00:00, 237.10it/s]\n" + ] + } + ], + "source": [ + "updated = u.compute_contacts(cutoff=7)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Results are contact durations measured in their true simulation time and expressed in `ns`" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[400.0, 400.0, 400.0, 400.0, 1200.0]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "residue17_modified_contacts = updated.contacts.get(17).get('POPE')\n", + "residue17_modified_contacts[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ufcc-dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/notebooks/03_Compute_Statistics.ipynb b/docs/notebooks/03_Compute_Statistics.ipynb new file mode 100644 index 0000000..a97658d --- /dev/null +++ b/docs/notebooks/03_Compute_Statistics.ipynb @@ -0,0 +1,574 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Introduction to the ProLint Contact Interface" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`Note`: This notebook is rather lengthy and discuss the entire ProLint interface. There are different ways to doing things, and plenty of ways to extend the available functionality. It may make sense to separate this into multiple notebooks, for beginners and advanced users. " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import List, Iterable\n", + "import numpy as np\n", + "from prolint2 import Universe\n", + "from prolint2.sampledata import GIRK" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "ts = Universe(GIRK.coordinates, GIRK.trajectory)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 13/13 [00:00<00:00, 170.94it/s]\n" + ] + } + ], + "source": [ + "contacts = ts.compute_contacts(cutoff=7)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Non-formatted contact output" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# These are triply nested dictionaries containing all contact information\n", + "# contacts.contact_frames, contacts.contacts" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Computing different metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "from prolint2.metrics.metrics import Metric, MeanMetric, SumMetric, MaxMetric" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Computing contact metrics is very easy" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "mean_instance = MeanMetric() # create an instance of the MeanMetric class\n", + "metric_instance = Metric(contacts, mean_instance) # feed the contacts and the above instance to the Metric class\n", + "mean_contacts = metric_instance.compute() # compute the metric" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "sum_instance = SumMetric()\n", + "metric_instance = Metric(contacts, sum_instance)\n", + "sum_contacts = metric_instance.compute()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "defaultdict(dict,\n", + " {'POPE': {'SumMetric': 0.25},\n", + " 'POPS': {'SumMetric': 0.41666666666666663}})" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum_contacts[14]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Defining a new metric class is also very easy" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "from prolint2.metrics.base import BaseMetric # import the base class\n", + "\n", + "# contact_array are all the contacts a single residue forms for each lipid. ProLint will call your function `compute_metric` with this array as an argument\n", + "# For example, if you have 10 residues and 1 lipid, ProLint will call your function 10 times, each time with a contact_array consisting of all \n", + "# the contacts that residue forms with the lipid during the trajectory.\n", + "\n", + "# `compute_metric`` should take an iterable (e.g. list, numpy array) as input and return a single value\n", + "\n", + "class ScaleAndMeanMetric(BaseMetric):\n", + " \"\"\" A metric that computes the mean of the contacts after scaling them by 2. \"\"\"\n", + " name: str = 'scale'\n", + " def compute_metric(self, contact_array: Iterable) -> float:\n", + " return np.mean(contact_array) * 2\n", + "\n", + "class RandomWeightedMeanMetric(BaseMetric):\n", + " \"\"\" A metric that computes the weighted mean of the contacts using random weights. \"\"\"\n", + " name: str = 'weighted_mean'\n", + " def compute_metric(self, contact_array: Iterable) -> float:\n", + " return np.average(contact_array, weights=np.random.rand(len(contact_array)))\n", + " \n", + "scale_and_mean_instance = Metric(contacts, ScaleAndMeanMetric())\n", + "scale_and_mean_contacts = scale_and_mean_instance.compute()\n", + "\n", + "weighted_mean_instance = Metric(contacts, RandomWeightedMeanMetric())\n", + "weighted_mean_contacts = weighted_mean_instance.compute()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### We also provide a class that you can use directly with your own metric function" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "from prolint2.metrics.metrics import UserDefinedMetric\n", + "\n", + "# Defining a new metric is as simple as defining a function that takes an iterable as input and returns a single value\n", + "def custom_user_function(contact_array: Iterable) -> float:\n", + " \"\"\" A custom metric that computes the mean of the contacts after scaling them by 10. \"\"\"\n", + " return np.mean(contact_array) * 10\n", + "\n", + "# Give your function to the UserDefinedMetric class and that's it!\n", + "user_metric_instance = UserDefinedMetric(custom_user_function)\n", + "user_metric = Metric(contacts, user_metric_instance)\n", + "user_metric_contacts = user_metric.compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### You can also choose to append results to the metric ouput by telling `Metric` to not clear previous results" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "metric_instance = Metric(contacts, MeanMetric()) # by default clear is True, so we clear any existing metrics\n", + "contacts_out = metric_instance.compute() # populate the metric column\n", + "\n", + "metric_instance = Metric(contacts, SumMetric(), clear=False) # set clear to False to keep the existing metrics\n", + "contacts_out = metric_instance.compute() # populate the metric column\n", + "\n", + "metric_instance = Metric(contacts, MaxMetric(), clear=False) # set clear to False to keep the existing metrics\n", + "contacts_out = metric_instance.compute() # populate the metric column" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### You can also specify a list of metrics to compute at once" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "metric_instances_list = [MeanMetric(), SumMetric(), MaxMetric()]\n", + "metric_instance = Metric(contacts, metric_instances_list) # clear is True by default so we clear any existing metrics\n", + "contacts_out = metric_instance.compute() # populate the metric columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### You can choose from different types of output formats" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "# DefaultOutputFormat is the default output format if no other format is specified\n", + "from prolint2.metrics.formatters import DefaultOutputFormat, SingleOutputFormat, CustomOutputFormat, ProLintDashboardOutputFormat" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "metric_instances_list = [MeanMetric(), SumMetric(), MaxMetric()]\n", + "metric_instance = Metric(contacts, metric_instances_list, output_format=CustomOutputFormat()) # gives a list of metrics matching the order of the metric_instances_list\n", + "contacts_out = metric_instance.compute() # populate the metric columns" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "# ProLintDashboardOutputFormat is used by the ProLint Dashboard and it requires the residue names and residue ids\n", + "input_dict = {\n", + " 'residue_names': ts.query.residues.resnames, \n", + " 'residue_ids': ts.query.residues.resids\n", + "}\n", + "\n", + "metric_instances_list = MeanMetric() # you can pass more than one metric instance and it works, but the format is not intended for that\n", + "metric_instance = Metric(\n", + " contacts, \n", + " metric_instances_list, \n", + " output_format=ProLintDashboardOutputFormat(**input_dict)\n", + ")\n", + "\n", + "contacts_out = metric_instance.compute()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "# If you care only for one metric, you can use the SingleOutputFormat\n", + "metric_instances_list = MeanMetric() \n", + "metric_instance = Metric(\n", + " contacts,\n", + " metric_instances_list,\n", + " output_format=SingleOutputFormat()\n", + ")\n", + "\n", + "contacts_out = metric_instance.compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### The `create_metric` function is a convenience function that creates a Metric instance and computes the metric in one step" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "from prolint2.metrics.metrics import create_metric" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "registry = ts.registry # get the registry of supported metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "metric_instance = create_metric(\n", + " contacts, \n", + " metrics=['mean', 'sum', 'max'], \n", + " metric_registry=registry, \n", + " output_format='default' # default, single, custom, dashboard\n", + ")\n", + "\n", + "contacts_out = metric_instance.compute()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Using `create_metric` with a custom function" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "def custom_function(contact_array: Iterable) -> float:\n", + " return np.mean(contact_array) * 10" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "metric_instance = create_metric(\n", + " contacts, \n", + " metrics=['custom'], # we want to use our custom function\n", + " custom_function=custom_function, # pass the custom function\n", + " metric_registry=registry, \n", + " output_format='default'\n", + ")\n", + "\n", + "contacts_out = metric_instance.compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Adding our Metric classes to the registry" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['max', 'mean', 'sum', 'custom']" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# These are all the functions currently in the registry\n", + "registry.get_registered_names()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "# Let's add the `ScaleAndMeanMetric` metric we defined earlier to the registry\n", + "# we provide the name of the metric and the class\n", + "registry.register('scaled_mean', ScaleAndMeanMetric)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['max', 'mean', 'sum', 'custom', 'scaled_mean']" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Show all functions in the registry again to see that the new metric is there\n", + "registry.get_registered_names()" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "metric_instance = create_metric(\n", + " contacts, \n", + " metrics=['scaled_mean', 'max', 'mean'], # we can now use the new metric by referring to it by name\n", + " metric_registry=registry, \n", + " output_format='default'\n", + ")\n", + "\n", + "contacts_out = metric_instance.compute()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### You can also convert between the different output formats" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "from prolint2.metrics.converters import DefaultToSingleConverter, CustomToSingleConverter" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "# We can convert from the default output format to the single output format\n", + "metric_instance = create_metric(\n", + " contacts, \n", + " metrics=['scaled_mean', 'max', 'mean'], # we can now use the new metric by referring to it by name\n", + " metric_registry=registry, \n", + " output_format='default'\n", + ")\n", + "contacts_out = metric_instance.compute()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "# we can also get other metrics we've computed\n", + "extract_single_metric = DefaultToSingleConverter(contacts_out, 'scaled_mean', registry).convert().get_result() " + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "# We can convert from the custom output format to the single output format\n", + "metric_instance = create_metric(\n", + " contacts, \n", + " metrics=['scaled_mean', 'max', 'mean'], # we can now use the new metric by referring to it by name\n", + " metric_registry=registry, \n", + " output_format='custom'\n", + ")\n", + "contacts_out = metric_instance.compute()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "# we have to specify the index of the metric we want to extract\n", + "extract_single_metric = CustomToSingleConverter(contacts_out, 0, registry).convert().get_result()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ufcc-dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/notebooks/04_Residence_Times.ipynb b/docs/notebooks/04_Residence_Times.ipynb new file mode 100644 index 0000000..501048b --- /dev/null +++ b/docs/notebooks/04_Residence_Times.ipynb @@ -0,0 +1,143 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Modular Implementation of Residence Time Calculations" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ProLint currently supports an early version of residence time calculation. This notebook shows how you can compute residence time. " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/danielramirez/mambaforge/envs/prolint2/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "from prolint2 import Universe\n", + "from prolint2.metrics.restime import KoffCalculator\n", + "from prolint2.sampledata import GIRK" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 13/13 [00:00<00:00, 108.92it/s]\n" + ] + } + ], + "source": [ + "u = Universe(GIRK.coordinates, GIRK.trajectory)\n", + "u.normalize_by = 'actual_time' # We need to use the true time for the normalization\n", + "c = u.compute_contacts(cutoff=7)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "4.8 0.4\n" + ] + } + ], + "source": [ + "# Next we need to define the parameters for the calculation\n", + "totaltime = u.trajectory.totaltime * u.params['unit_conversion_factor']\n", + "timestep = round(u.trajectory.dt * u.params['unit_conversion_factor'], 4)\n", + "print (totaltime, timestep)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`Residence Time` for residue 401 with cholesterol is computed as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Get the data for residue 401 with CHOL\n", + "data = c.contacts[401]['CHOL']" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2.5895888515222234 0.3861616871775516\n" + ] + } + ], + "source": [ + "r401_chol = KoffCalculator(data, totaltime, timestep, fitting_func_name='bi_expo')\n", + "print (r401_chol.koff, r401_chol.res_time)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ufcc-dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/notebooks/05_Contact_Arithmetic.ipynb b/docs/notebooks/05_Contact_Arithmetic.ipynb new file mode 100644 index 0000000..df32cb0 --- /dev/null +++ b/docs/notebooks/05_Contact_Arithmetic.ipynb @@ -0,0 +1,300 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Building Complex Contact Schema" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "One of the cool things of the new desing of ProLint is that you can build very complex contact representations. Similar to building blocks. We showcase a few examples here which we think are useful and interesting." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/danielramirez/mambaforge/envs/prolint2/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "from prolint2 import Universe\n", + "from prolint2.sampledata import GIRK" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "u = Universe(GIRK.coordinates, GIRK.trajectory)\n", + "u.normalize_by = 'actual_time' # We will use true time normalization, though any of the available options will work just as well" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "### Compute contacts at 3 different cutoffs" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 13/13 [00:00<00:00, 104.49it/s]\n", + "100%|██████████| 13/13 [00:00<00:00, 280.49it/s]\n", + "100%|██████████| 13/13 [00:00<00:00, 229.45it/s]\n" + ] + } + ], + "source": [ + "c1 = u.compute_contacts(cutoff=6)\n", + "c2 = u.compute_contacts(cutoff=7)\n", + "c3 = u.compute_contacts(cutoff=8)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Multiple Cutoffs" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you would like to use a `Double cutoff` as a damping layer, which will allow you to measure contact durations more accurately, then this can be very easily done with a very intuitive syntax:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "c3 = c1.intersection(c2)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.8, 0.4, 0.4]\n" + ] + } + ], + "source": [ + "# Let's again loog at residue 401 with CHOL\n", + "r401_chol = sorted(c3.contacts[401]['CHOL'], reverse=True)\n", + "print([round(x, 3) for x in r401_chol])" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Why this works" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Above we are computing the intersection between `c1` and `c2`, and that is all there is to it. The reasoning behind this is quite simple. Cutoffs are radial, which in our case, has the physical interpretation of $c1 \\subseteq c2$, That is, the contacts with the smaller cutoff are a subset of the contacts with the larger cutoff. This is a very intuitive way of thinking about it, and it allows us to build very complex contact representations with ease, by noting the following: " + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If `c1` and `c2` are sets of numbers, and `c1` is a subset of `c2` (i.e., $c1 \\subseteq c2$), this relationship can be reflected in the following common set operations:\n", + "\n", + "1. Union ($c1 \\cup c2$): Since $c1$ is a subset of $c2$, the union of $c1$ and $c2$ will be equal to $c2$. This is because all elements of $c1$ are already present in $c2$, so the union will not add any new elements to the resulting set.\n", + "\n", + "2. Intersection ($c1 \\cap c2$): The intersection of $c1$ and $c2$ will be equal to $c1$. This is because all elements of $c1$ are present in $c2$, so the intersection will include all elements of $c1$.\n", + "\n", + "3. Difference ($c2 - c1$): The difference of $c2$ and $c1$ will be the set of elements that are present in $c2$ but not in $c1$. Since $c1$ is a subset of $c2$, the difference will include all the elements unique to $c2$.\n", + "\n", + "4. Difference ($c1 - c2$): The difference of $c1$ and $c2$ will be the empty set ($\\emptyset$), since all elements of $c1$ are present in $c2$. There are no elements unique to $c1$ that are not in $c2$.\n", + "\n", + "5. Symmetric Difference ($c1 \\bigtriangleup c2$): The symmetric difference of $c1$ and $c2$ will also be equal to the difference ($c2 - c1$). This is because symmetric difference includes elements that are unique to one of the sets, and since $c1$ is a subset of $c2$, all unique elements are in $c2$ but not in $c1$.\n", + "\n", + "In summary, for sets $c1$ and $c2$ where $c1 \\subseteq c2$:\n", + "\n", + "- $c1 \\cup c2 = c2$\n", + "- $c1 \\cap c2 = c1$\n", + "- $c2 - c1 = \\text{elements unique to } c2$\n", + "- $c1 - c2 = \\emptyset$\n", + "- $c1 \\bigtriangleup c2 = c2 - c1$\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In practice this results in a trivially easy way to construct complex contact schemas: " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 13/13 [00:00<00:00, 112.28it/s]\n", + "100%|██████████| 13/13 [00:00<00:00, 158.98it/s]\n", + "100%|██████████| 13/13 [00:00<00:00, 122.64it/s]\n" + ] + } + ], + "source": [ + "# Compute more contacts at different cutoffs\n", + "c3 = u.compute_contacts(cutoff=8)\n", + "c4 = u.compute_contacts(cutoff=9)\n", + "c5 = u.compute_contacts(cutoff=10)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "_ = c1 + c2 # Double cutoff \n", + "_ = c1 + c2 + c3 # Triple cutoff\n", + "_ = c1 + c2 + c3 + c4 # Quadruple cutoff\n", + "_ = c1 + c2 + c3 + c4 + c5 # Quintuple cutoff" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This way you can, for example, look at how contacts with different lipids fall of as a function of the cutoff" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Annular Shell Contact" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also go in the other direction, and construct a donut shape contact schema, where we have a core of contacts with a smaller cutoff, and a shell of contacts with a larger cutoff, and we substract/remove the core from the shell:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c2 - c1 # or c2.difference(c1)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Such an analysis can be very useful when studying lipid-protein interactions where the concept of annular lipids is very important. With the logic and syntax developed here, we can very easily construct a contact schema that will allow us to study the annular lipids as a function of the cutoff." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "first_shell = c2 - c1 # or c2.difference(c1)\n", + "second_shell = c3 - c2 # or c3.difference(c2)\n", + "third_shell = c4 - c3 # or c4.difference(c3)\n", + "fourth_shell = c5 - c4 # or c5.difference(c4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ufcc-dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/notebooks/Getting_Started.ipynb b/docs/notebooks/Getting_Started.ipynb new file mode 100644 index 0000000..0f383cf --- /dev/null +++ b/docs/notebooks/Getting_Started.ipynb @@ -0,0 +1,663 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Welcome to ProLint2" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ProLint2 is a tool for the analysis of protein-lipid interactions. It has been completely rewritten and now includes many new features: \n", + "\n", + "1. Orders of magnitude faster than the original ProLint\n", + "2. Modular design for easy extension\n", + "3. Completely new visualization front end\n", + "4. Many other new features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### How to install the package" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Please read the [README](https://github.com/ProLint/prolint2/blob/main/README.md) file for instructions on how to install ProLint2." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### How to get started" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/danielramirez/mambaforge/envs/prolint2/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "from prolint2 import Universe" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ProLint now is built on top of MDAnalysis. We provide a wrapper class around MDAnalysis.Universe that allows you to load file and perform analysis the same way as you would with MDAnalysis. The Prolint Universe object, however, has additional methods that allow you to perform analysis on protein-lipid interactions. \n", + "\n", + "```python\n", + "from prolint2 import Universe\n", + "u = Universe('coordinates.gro', 'trajectory.xtc')\n", + "```\n", + "\n", + "And that's it! You can now use the `u` object to perform analysis. This is exactly the same as you would with MDAnalysis. For example, to get the center of mass of the protein, you can do:\n", + "\n", + "```python\n", + "u.select_atoms('protein').center_of_mass()\n", + "```\n", + "\n", + "Of course, the reason to use ProLint is to analyze protein-lipid interactions. So let's see how we can do that. " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from prolint2.sampledata import GIRK\n", + "u = Universe(GIRK.coordinates, GIRK.trajectory)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of frames: 13, number of atoms: 23820\n" + ] + } + ], + "source": [ + "n_frames = u.trajectory.n_frames\n", + "n_atoms = u.atoms.n_atoms\n", + "\n", + "print (f'Number of frames: {n_frames}, number of atoms: {n_atoms}')" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The `query` and `database` terminology\n", + "\n", + "ProLint computes contacts between a reference group of atoms (usually the protein) that we call the `query`, and another group of atoms (usually the lipids) that we call the `database`. The query group is the group of atoms/residues that you want to analyze. The database group is the group of atoms that you want to analyze their interactions with. For example, if you want to analyze the interactions between a protein and surrounding lipids, the protein is the query and the lipids are the database group. \n", + "\n", + "When you create a `Universe` object ProLint will use proteins as the query and all other atoms as the database. You can access them by their attributes:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(>,\n", + " >)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "u.query, u.database" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of query atoms: 2956, number of database atoms: 20864\n" + ] + } + ], + "source": [ + "n_query_atoms = u.query.n_atoms\n", + "n_database_atoms = u.database.n_atoms\n", + "\n", + "print (f'Number of query atoms: {n_query_atoms}, number of database atoms: {n_database_atoms}')" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice how we use the `query` and `database` attributes return ProLint wrapper objects around the MDAnalysis.AtomGroup objects. This allows us to perform analysis on the query and database groups: " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(Counter({'ARG': 64,\n", + " 'GLN': 40,\n", + " 'TYR': 40,\n", + " 'MET': 48,\n", + " 'GLU': 112,\n", + " 'LYS': 56,\n", + " 'THR': 96,\n", + " 'GLY': 76,\n", + " 'CYS': 32,\n", + " 'ASN': 52,\n", + " 'VAL': 104,\n", + " 'HIS': 28,\n", + " 'LEU': 120,\n", + " 'SER': 72,\n", + " 'ASP': 56,\n", + " 'PHE': 88,\n", + " 'TRP': 28,\n", + " 'ILE': 88,\n", + " 'ALA': 52,\n", + " 'PRO': 32}),\n", + " Counter({'POPE': 652, 'POPS': 652, 'CHOL': 652}),\n", + " 'POPE')" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "u.query.resname_counts, u.database.resname_counts, u.database.get_resname(2345)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Computing contacts" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We make it now very easy and extremely fast to compute contacts between the query and database groups. To compute contacts, you can use the `compute_contacts` method of the `Universe` object. This method takes the following arguments:\n", + "- `cutoff`: The cutoff distance to use for computing contacts. Units are in Angstroms.\n", + "- `backend`: The backend to use for computing contacts. Currently, this option is not used and the default backend is used. In the future, we will add more backends for computing contacts." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 13/13 [00:00<00:00, 119.13it/s]\n" + ] + } + ], + "source": [ + "contacts = u.compute_contacts(cutoff=7) # cutoff in Angstroms" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789101112
ResidueIDLipidId
124820001000000000
26720000000001000
26810100000000000
27680100000000000
1026480000000001000
.............................................
125924630100000000000
27550100000000000
27601000000000000
126124630100000000000
126324680000000010000
\n", + "

5680 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 9 10 11 12\n", + "ResidueID LipidId \n", + "1 2482 0 0 0 1 0 0 0 0 0 0 0 0 0\n", + " 2672 0 0 0 0 0 0 0 0 0 1 0 0 0\n", + " 2681 0 1 0 0 0 0 0 0 0 0 0 0 0\n", + " 2768 0 1 0 0 0 0 0 0 0 0 0 0 0\n", + "10 2648 0 0 0 0 0 0 0 0 0 1 0 0 0\n", + "... .. .. .. .. .. .. .. .. .. .. .. .. ..\n", + "1259 2463 0 1 0 0 0 0 0 0 0 0 0 0 0\n", + " 2755 0 1 0 0 0 0 0 0 0 0 0 0 0\n", + " 2760 1 0 0 0 0 0 0 0 0 0 0 0 0\n", + "1261 2463 0 1 0 0 0 0 0 0 0 0 0 0 0\n", + "1263 2468 0 0 0 0 0 0 0 0 1 0 0 0 0\n", + "\n", + "[5680 rows x 13 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# This may take a few seconds because pandas is slow\n", + "df = contacts.create_dataframe(n_frames)\n", + "df " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that the dataframe itself is very lightweight" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "MultiIndex: 5680 entries, (1, 2482) to (1263, 2468)\n", + "Data columns (total 13 columns):\n", + " # Column Non-Null Count Dtype\n", + "--- ------ -------------- -----\n", + " 0 0 5680 non-null int8 \n", + " 1 1 5680 non-null int8 \n", + " 2 2 5680 non-null int8 \n", + " 3 3 5680 non-null int8 \n", + " 4 4 5680 non-null int8 \n", + " 5 5 5680 non-null int8 \n", + " 6 6 5680 non-null int8 \n", + " 7 7 5680 non-null int8 \n", + " 8 8 5680 non-null int8 \n", + " 9 9 5680 non-null int8 \n", + " 10 10 5680 non-null int8 \n", + " 11 11 5680 non-null int8 \n", + " 12 12 5680 non-null int8 \n", + "dtypes: int8(13)\n", + "memory usage: 135.5 KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The output dataframe has two indices: \n", + "1. Residue IDs of the query group\n", + "2. Lipid IDs of the database group\n", + "\n", + "Columns are all of the frames in the trajectory. The values are the number of contacts between the query and database residues.\n", + "Only residues that have at least one contact are included in the output dataframe. \n", + "\n", + "Note that the above DataFrame provides a complete description of your system with the given cutoff. You can use this DataFrame to perform any analysis you want, and do not need to use the ProLint API, if that is all you need." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "def get_lipids_by_residue_id(df: pd.DataFrame, residue_id: int) -> list:\n", + " # Get all LipidIds that interact with the given ResidueID\n", + " lipids = df.loc[residue_id].index.tolist()\n", + " return lipids\n", + "\n", + "def get_residues_by_lipid_id(df: pd.DataFrame, lipid_id: int) -> list:\n", + " # Get all ResidueIDs that interact with the given LipidId\n", + " residues = df.xs(lipid_id, level='LipidId', axis=0).index.tolist()\n", + " return residues\n", + "\n", + "def get_contact_data(df: pd.DataFrame, residue_id: int, lipid_id: int, output: str = 'contacts') -> np.array:\n", + " # Get contact column as a numpy array or the indices of 1's in the column\n", + " contact_array = df.loc[(residue_id, lipid_id)].to_numpy()\n", + "\n", + " if output == 'indices':\n", + " return np.nonzero(contact_array)[0]\n", + " else:\n", + " return contact_array\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "lipid_ids = get_lipids_by_residue_id(df, 18) # all lipids that interact with residue id 18\n", + "residue_ids = get_residues_by_lipid_id(df, 2594) # all residues that interact with lipid id 2594\n", + "indices = get_contact_data(df, 18, 2594, output='indices') # indices of contacts between residue id 18 and lipid id 2594" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that these functions are also available from `contacts` instance created above, and they are faster compared to using the DataFrame. The idea here is that you can use the DataFrame to perform any analysis you want, since as mentioned above it provides a complete description of your system. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the next notebook, we will look at how you can modify the query and database to get more customized results." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ufcc-dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/prolint2/__init__.py b/prolint2/__init__.py index 48141e3..356a852 100644 --- a/prolint2/__init__.py +++ b/prolint2/__init__.py @@ -2,7 +2,8 @@ # Add imports here import os -from .prolint2 import * +from .core import Universe + from .interactive_sel import * # Handle versioneer @@ -16,11 +17,9 @@ # to get the paths relative to the root of the package _ROOT = os.path.abspath(os.path.dirname(__file__)) - def get_data(): return os.path.join(_ROOT, "data") - # to get the path to the config file def get_config(): return os.path.join(_ROOT, "config.ini") diff --git a/prolint2/computers/__init__.py b/prolint2/computers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/prolint2/computers/base.py b/prolint2/computers/base.py new file mode 100644 index 0000000..54ef64c --- /dev/null +++ b/prolint2/computers/base.py @@ -0,0 +1,20 @@ +from abc import ABC, abstractmethod +from MDAnalysis.analysis.base import AnalysisBase + +class ContactComputerBase(AnalysisBase, ABC): + # @abstractmethod + # def _compute_pairs(self): + # pass + + # @abstractmethod + # def _compute(self): + # pass + + def __add__(self, other): + pass + + def intersection(self, other): + pass + + def union(self, other): + pass diff --git a/prolint2/computers/contacts.py b/prolint2/computers/contacts.py new file mode 100644 index 0000000..0eedd5f --- /dev/null +++ b/prolint2/computers/contacts.py @@ -0,0 +1,88 @@ +from collections import defaultdict + +from MDAnalysis.lib.nsgrid import FastNS + +from prolint2.computers.base import ContactComputerBase +from prolint2.utils.utils import fast_unique_comparison + +class SerialContacts(ContactComputerBase): + r""" + Class to get the distance-based contacts starting from two AtomGroups + using a *serial* approach. + + It inherits from the MDAnalysis AnalysisBase class. + """ + def __init__(self, universe, query, database, cutoff, **kwargs): + + super().__init__(universe.universe.trajectory, **kwargs) + + self.query = query + self.database = database + self.cutoff = cutoff + + self.q_resids = self.query.resids + self.db_resids = self.database.resids + self.db_resnames = self.database.resnames + + self.contacts = None + self.contact_frames = defaultdict(lambda: defaultdict(list)) + + self._validate_inputs() + + def _validate_inputs(self): + """ + Validate the inputs. + """ + # Raise if selection doesn't exist + if len(self.query) == 0 or len(self.database) == 0: + raise ValueError("Invalid selection. Empty AtomGroup(s).") + + if self.cutoff <= 0: + raise ValueError("The cutoff must be greater than 0.") + + def _get_residue_lipid_info(self, pair): + """ + Get the residue and lipid information for a given pair. + """ + residue_id = self.q_resids[pair[0]] + lipid_id = self.db_resids[pair[1]] + lipid_name = self.db_resnames[pair[1]] + return residue_id, lipid_id, lipid_name + + def _compute_pairs(self): + """ + Compute the pairs of residues and lipids that are within the cutoff distance. + """ + gridsearch = FastNS( + self.cutoff, self.database.positions, box=self.database.dimensions, pbc=True + ) + result = gridsearch.search(self.query.positions) + pairs = result.get_pairs() + + return pairs + + def _single_frame(self): + """ + Compute the contacts for a single frame. + """ + pairs = self._compute_pairs() + + q_resid_indices = pairs[:, 0] + db_resid_indices = pairs[:, 1] + residue_ids = self.q_resids[q_resid_indices] + lipid_ids = self.db_resids[db_resid_indices] + lipid_names = self.db_resnames[db_resid_indices] + + residue_ids, lipid_ids, lipid_names = fast_unique_comparison(residue_ids, lipid_ids, lipid_names) + + existing_pairs = set() + for unique_data in zip(residue_ids, lipid_ids, lipid_names): + residue_id, lipid_id, _ = unique_data + if (residue_id, lipid_id) not in existing_pairs: + existing_pairs.add((residue_id, lipid_id)) + self.contact_frames[residue_id][lipid_id].append(self._frame_index) + + # def _conclude(self): + # contacts = ExactContacts(self.query.universe, self.contact_frames) + # contacts.run() + # self.contacts = contacts diff --git a/prolint2/computers/distances.py b/prolint2/computers/distances.py new file mode 100644 index 0000000..cdd3342 --- /dev/null +++ b/prolint2/computers/distances.py @@ -0,0 +1,56 @@ +import numpy as np + +from MDAnalysis.analysis import distances +from MDAnalysis.analysis.base import AnalysisBase + +class SerialDistances(AnalysisBase): + r""" + Class to get the distance-based contacts starting from two AtomGroups + using a *serial* approach. + + It inherits from the MDAnalysis AnalysisBase class. + """ + def __init__( + self, universe, query, database, lipid_id, residue_id, frame_filter, **kwargs + ): + + super().__init__(universe.universe.trajectory, **kwargs) + self.query = query + self.database = database + self.frame_filter = frame_filter + frame_range = np.arange(len(self.frame_filter)) + self.frame_mapping = {k: v for k, v in zip(self.frame_filter, frame_range)} + + self.lipid_atomgroup = self.database.select_atoms(f"resid {lipid_id}") + self.resid_atomgroup = self.query.select_atoms(f"resid {residue_id}") + self.lipid_atomnames = self.lipid_atomgroup.names.tolist() + self.resid_atomnames = self.resid_atomgroup.names.tolist() + self.result_array = None + self.distance_array = None + + # Raise if selection doesn't exist + if len(self.query) == 0 or len(self.database) == 0: + raise ValueError("Invalid selection. Empty AtomGroup(s).") + + def _prepare(self): + self.result_array = np.zeros( + ( + len(self.frame_filter), + self.lipid_atomgroup.n_atoms, + self.resid_atomgroup.n_atoms, + ) + ) + + def _single_frame(self): + if self._frame_index in self.frame_filter: + r = distances.distance_array( + self.lipid_atomgroup.positions, + self.resid_atomgroup.positions, + box=self.database.universe.dimensions, + ) + # print ('frame iterator: ', self._frame_index) + self.result_array[self.frame_mapping[self._frame_index]] = r + + def _conclude(self): + self.distance_array = np.mean(self.result_array, axis=0) + del self.result_array diff --git a/prolint2/computers/payload.py b/prolint2/computers/payload.py new file mode 100644 index 0000000..efce47b --- /dev/null +++ b/prolint2/computers/payload.py @@ -0,0 +1,87 @@ +from prolint2.metrics.metrics import create_metric + +class ServerPayload: + """ Class that provides the data for the dashboard. + + Parameters + ---------- + contacts : :class:`ContactsProvider` + The contacts provider object. + ts : :class:`Universe` + The universe object. + + """ + def __init__(self, contacts, ts): + self.contacts = contacts + + self.registry = ts.registry + + self.database_resnames = ts.database.unique_resnames.tolist() + self.database_resname_counts = ts.database.resname_counts + self.residue_names = ts.query.residues.resnames + self.residue_ids = ts.query.residues.resids + + self.dt = ts.trajectory.dt + self.totaltime = ts.trajectory.totaltime + + self.ordered_lipid_names = list(self.database_resname_counts.keys()) + + self._compute() + + def residue_contacts(self, lipid_type: str = None, metric="sum", dt=1, totaltime=1): + """ Compute residue contacts. """ + metric_instance = create_metric( + self.contacts, + metrics=[metric], + metric_registry=self.registry, + output_format="dashboard", + lipid_type=self.ordered_lipid_names[0] if lipid_type is None else lipid_type, + residue_names=self.residue_names, + residue_ids=self.residue_ids, + ) + return metric_instance.compute(dt=dt, totaltime=totaltime) + # return metric_instance.compute(dt=self.dt, totaltime=self.totaltime) + + def _compute(self, lipid_type: str = None, metric="sum"): + # protein name is hardcoded -> read protein name(s) dynamically + # update code to handle multiple identical proteins + # update code to handle multiple copies of different proteins + protein_name = "Protein" # TODO: we'll need to update this into a list and iterate over it + proteins = [protein_name] + protein_counts = {protein_name: 1} + + residue_contacts = self.residue_contacts(lipid_type=lipid_type, metric=metric) + # print ('residue_contacts', residue_contacts) + + lipid_counts = self.database_resname_counts + total_lipid_sum = sum(lipid_counts.values()) + sub_data = [] + for lipid, count in lipid_counts.items(): + sub_data.append({"category": lipid, "value": "{:.2f}".format(count / total_lipid_sum)}) + + pie_data = [] + for protein in proteins: + value = protein_counts[protein] / sum(protein_counts.values()) + + protein_pdata = { + "category": protein_name, + "value": "{:.2f}".format(value), + "subData": sub_data, + } + pie_data.append(protein_pdata) + + self._payload = { + "data": {protein_name: residue_contacts}, + "proteins": [protein_name], + "lipids": self.ordered_lipid_names, + "pie_data": pie_data, # TODO: include protein info + } + + @property + def payload(self): + """The payload.""" + return self._payload + + def get_payload(self): + """Return the payload.""" + return self.payload diff --git a/prolint2/config/__init__.py b/prolint2/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/prolint2/config/units.py b/prolint2/config/units.py new file mode 100644 index 0000000..cc4a33b --- /dev/null +++ b/prolint2/config/units.py @@ -0,0 +1,18 @@ +from enum import Enum + +# pylint: disable=invalid-name +class UnitConversionFactor(Enum): + """Conversion factors for time units.""" + fs = 1e-15 + ps = 1e-12 + ns = 1e-9 + us = 1e-6 + ms = 1e-3 + s = 1.0 + +DEFAULT_SIM_PARAMS = { + 'units': 'us', + 'normalizer': 'actual_time', + 'unit_conversion_factor': UnitConversionFactor.ps.value / UnitConversionFactor.us.value, + 'norm_factor': 1 +} diff --git a/prolint2/contacts.py b/prolint2/contacts.py deleted file mode 100644 index 201b3c3..0000000 --- a/prolint2/contacts.py +++ /dev/null @@ -1,536 +0,0 @@ -r"""Contacts base classes --- :mod:`prolint2.contacts` -====================================================== -:Authors: Daniel P. Ramirez & Besian I. Sejdiu -:Year: 2022 -:Copyright: MIT License -""" - -import os -import pandas as pd -import numpy as np -import MDAnalysis as mda -from collections import Counter -from collections import namedtuple -from collections import OrderedDict -from itertools import groupby -from MDAnalysis.lib.nsgrid import FastNS -from MDAnalysis.analysis.base import AnalysisBase -from MDAnalysis.analysis import distances -import configparser - -# Getting the config file -config = configparser.ConfigParser(allow_no_value=True) -config.read(os.path.join(os.path.abspath(os.path.dirname(__file__)), "config.ini")) -parameters_config = config["Parameters"] - - -class SerialContacts(AnalysisBase): - r""" - Class to get the distance-based contacts starting from two AtomGroups - using a *serial* approach. - - It inherits from the MDAnalysis AnalysisBase class. - """ - # TODO: - # @bis: The front end has the hierarch protein -> lipids -> residue - # The data, however, are stored protein -> residue -> lipids, leading to unnecessary - # work later on. We should modify this, so we store data in the right - # hierarchical structure. - def __init__(self, universe, query, database, cutoff, **kwargs): - - super().__init__(universe.universe.trajectory, **kwargs) - self.query = query - self.database = database - self.cutoff = cutoff - - # We need to convert to list to allow for JSON serialization - self.q_resids = self.query.resids.tolist() - self.db_resids = self.database.resids.tolist() - self.db_resnames = self.database.resnames - self.dp_resnames_unique = np.unique(self.db_resnames) - - # Raise if selection doesn't exist - if len(self.query) == 0 or len(self.database) == 0: - raise ValueError("Invalid selection. Empty AtomGroup(s).") - - if self.cutoff <= 0: - raise ValueError("The cutoff must be greater than 0.") - - def _prepare(self): - self.contacts = { - k: {v: [] for v in self.dp_resnames_unique} - for k in [x for x in self.q_resids] - } - self.contact_frames = {} - - def _single_frame(self): - gridsearch = FastNS( - self.cutoff, self.database.positions, box=self.database.dimensions, pbc=True - ) - result = gridsearch.search(self.query.positions) - pairs = result.get_pairs() - - existing_pairs = {} - for p in pairs: - residue_id = self.q_resids[p[0]] - lipid_id = self.db_resids[p[1]] - string = f"{residue_id},{lipid_id}" - - # if self._frame_index == 0 and residue_id < 200: - # print (p[0], p[1], residue_id, lipid_id) - # NOTE: - # We want to keep track of frames the cutoff is satisfied - # and also the pairs that satisfied the cutoff -> this can be used to avoid - # the distance array analysis necessary later. - # frame_pairs = (self._frame_index, p) - # if string in self.contact_frames: - # self.contact_frames[string].append(frame_pairs) - # else: - # self.contact_frames[string] = [frame_pairs] - - if f"{residue_id}{lipid_id}" in existing_pairs: - continue - existing_pairs[f"{residue_id}{lipid_id}"] = True - - # TODO: - # @bis: we may be able to get further performance improvements by - # using the Counter object with its update methods. - - # TODO: - # these IDs are not guaranteed to be unique: - # For systems containing multiple proteins - # For very large systems with duplicate lipid residue IDs (e.g. two instances of 1234CHOL) - lipid_name = self.db_resnames[p[1]] - self.contacts[residue_id][lipid_name].append(lipid_id) - - # NOTE: - # We want to keep track of frames the cutoff is satisfied - # the self.contact_frames dict gets very large and may not be feasible for large systems. - # In general, it's not a method that's going to scale well. Given the backend we have, it - # makes more sense to store results in a temporary SQL database. Retrieval will be superfast, - # and we can do much more that way. - if string in self.contact_frames: - self.contact_frames[string].append(self._frame_index) - else: - self.contact_frames[string] = [self._frame_index] - - def _conclude(self): - self.contacts = dict( - map( - lambda x: ( - x[0], - dict(map(lambda y: (y[0], Counter(y[1])), x[1].items())), - ), - self.contacts.items(), - ) - ) - - -class SerialDistances(AnalysisBase): - r""" - Class to get the distance-based contacts starting from two AtomGroups - using a *serial* approach. - - It inherits from the MDAnalysis AnalysisBase class. - """ - # TODO: - # @bis: The front end has the hierarch protein -> lipids -> residue - # The data, however, are stored protein -> residue -> lipids, leading to unnecessary - # work later on. We should modify this, so we store data in the right - # hierarchical structure. - def __init__( - self, universe, query, database, lipid_id, residue_id, frame_filter, **kwargs - ): - - super().__init__(universe.universe.trajectory, **kwargs) - self.query = query - self.database = database - self.frame_filter = frame_filter - frame_range = np.arange(len(self.frame_filter)) - self.frame_mapping = {k: v for k, v in zip(self.frame_filter, frame_range)} - - self.lipid_atomgroup = self.database.select_atoms(f"resid {lipid_id}") - self.resid_atomgroup = self.query.select_atoms(f"resid {residue_id}") - self.lipid_atomnames = self.lipid_atomgroup.names.tolist() - self.resid_atomnames = self.resid_atomgroup.names.tolist() - - # Raise if selection doesn't exist - if len(self.query) == 0 or len(self.database) == 0: - raise ValueError("Invalid selection. Empty AtomGroup(s).") - - def _prepare(self): - self.result_array = np.zeros( - ( - len(self.frame_filter), - self.lipid_atomgroup.n_atoms, - self.resid_atomgroup.n_atoms, - ) - ) - - def _single_frame(self): - if self._frame_index in self.frame_filter: - r = distances.distance_array( - self.lipid_atomgroup.positions, - self.resid_atomgroup.positions, - box=self.database.universe.dimensions, - ) - # print ('frame iterator: ', self._frame_index) - self.result_array[self.frame_mapping[self._frame_index]] = r - - def _conclude(self): - self.distance_array = np.mean(self.result_array, axis=0) - del self.result_array - - -class Contacts(object): - """Stores information to run and analyze the distance-based contacts results - between the :class:`.prolint2.QueryProteins` and :class:`.prolint2.MembraneDatabase` groups. - - Parameters - ---------- - query : :class:`QueryProteins` - database : :class:`MembraneDatabase` - - Attributes - ---------- - query : :class:`QueryProteins` - **Query** group to use during the calculation of the contacts. - database : :class:`MembraneDatabase` - **Database** group to use during the calculation of the contacts. - contacts : Array (None) - Numpy uni-dimensional array of shape equal to the number of frames used during the calculation of the contacts. - Each element of the array has a Scipy matrix with the pairs (i, j) defining the contacts, where *i* is the index - of the residue in the **query** group, and *j* is the index of the residue in the **database** group. It can be populated - using either the **compute()** or the **load()** methods. - counts : Pandas DataFrame (None) - Pandas DataFrame with the counted contacts. It is populated using the **count_contacts()** method. - """ - - def __init__(self, query, database): - self.query = query - self.database = database - self.residue_names = self.query.selected.residues.resnames - self.residue_ids = self.query.selected.residues.resids - self.cutoff = None - self.contacts = None - self.contact_frames = None - self.metrics = None - - # TODO: - # @bis: I really don't like how we have to back reference the trajectory here - # What's the best way here? Include trajectory as an initialization argument? - self.n_frames = query.selected.universe.trajectory.n_frames - self.dt = self.query.selected.universe.trajectory.dt - self.totaltime = self.query.selected.universe.trajectory.totaltime - - def compute(self, cutoff=int(parameters_config["cutoff"]), get_metrics=False): - """ - Compute the cutoff distance-based contacts using a cythonized version of a cell-list algorithm. - - Parameters - ---------- - cutoff : int (7) - Value in Angstrom to be used as cutoff for the calculation of the contacts. - """ - self.cutoff = cutoff - assert isinstance( - self.query.selected, - (mda.core.groups.AtomGroup), - ), "the query has to be an AtomGroup" - assert isinstance( - self.database.selected, - (mda.core.groups.AtomGroup), - ), "the database has to be an AtomGroup" - temp_instance = SerialContacts( - self.query.selected.universe, - self.query.selected, - self.database.selected, - cutoff, - ) - temp_instance.run(verbose=True) - - self.contacts = temp_instance.contacts - self.contact_frames = temp_instance.contact_frames - if get_metrics: - self.metrics = self.contacts_to_metrics() - - # this functions allows the definition of chunks of frames with uninterrupted interactions - # i.e. it takes a list of frames as [9, 11, 12] and it returns [1, 2] - def ranges(self, lst): - pos = (j - i for i, j in enumerate(lst)) - t = 0 - for i, els in groupby(pos): - l = len(list(els)) - el = lst[t] - t += l - yield len(range(el, el + l)) - - def contacts_to_dataframe(self): - """ - Convert the contacts dictionary to a Pandas DataFrame. - - Returns - ------- - Pandas DataFrame - Pandas DataFrame with all the contacts. - """ - if not self.contacts: - raise ValueError("The contacts dictionary is empty.") - else: - results = [] - keys = self.contacts.keys() - for idx, protein_resi in enumerate(keys): - for lip_type in self.contacts[protein_resi].keys(): - for lip_res, t_frames in self.contacts[protein_resi][ - lip_type - ].items(): - for fr in self.contact_frames[ - "{},{}".format(protein_resi, lip_res) - ]: - results.append( - ( - "Protein1", - protein_resi, - self.query.selected.residues[idx].resname, - lip_type, - lip_res, - fr, - ) - ) - results_df = pd.DataFrame( - results, - columns=[ - "Protein", - "Residue ID", - "Residue Name", - "Lipid Type", - "Lipid ID", - "Frame", - ], - ) - return results_df - - def contacts_to_metrics(self): - """ - Convert the contacts dictionary to a Pandas DataFrame with different metrics. - - Returns - ------- - Pandas DataFrame - Pandas DataFrame with different metrics for the contacts. - """ - if not self.contacts: - raise ValueError("The contacts dictionary is empty.") - else: - metrics = [] - keys = self.contacts.keys() - for idx, protein_resi in enumerate(keys): - for lip_type in self.contacts[protein_resi].keys(): - for lip_res, t_frames in self.contacts[protein_resi][ - lip_type - ].items(): - # getting chunks of frames with uninterrupted interactions - key = "{},{}".format(protein_resi, lip_res) - temp = list(self.ranges(self.contact_frames[key])) - - # calculating metrics - metrics.append( - ( - "Protein1", - protein_resi, - self.residue_names[idx], - lip_type, - lip_res, - t_frames, - t_frames / self.n_frames, - max(temp), - np.mean(temp), - ) - ) - metrics_df = pd.DataFrame( - metrics, - columns=[ - "Protein", - "Residue ID", - "Residue Name", - "Lipid Type", - "Lipid ID", - "Sum of all contacts", - "Occupancy", - "Longest Duration", - "Mean Duration", - ], - ) - return metrics_df - - - def export(self, filename): - """ - Export the contacts array to a file. - - Parameters - ---------- - filename : str - Name of the file to export the contacts array. - """ - print("Exporting contacts and metrics to files...") - self.contacts_to_dataframe().to_csv(filename, index=False) - if not isinstance(self.metrics, pd.DataFrame): - self.contacts_to_metrics().to_csv(filename.replace(".csv", "_metrics.csv"), index=False) - print("Contacts successfully exported to file '{}' and metrics to '{}'!!".format(filename, filename.replace(".csv", "_metrics.csv"))) - - def filter_by_percentile(self, percentile=0.75, metric="Sum of all contacts"): - """ - Filter the contacts by percentile. - - Parameters - ---------- - percentile : float (0.75) - Percentile to be used for filtering the contacts array. - """ - if metric not in [ - "Sum of all contacts", - "Occupancy", - "Longest Duration", - "Mean Duration", - ]: - raise ValueError("The metric is not valid.") - else: - return self.metrics[ - self.metrics[metric] > self.metrics[metric].quantile(percentile) - ] - - def server_payload(self, metric="Sum of all contacts"): - - # TODO: - # protein name is hardcoded -> read protein name(s) dynamically - # update code to handle multiple identical proteins - # update code to handle multiple copies of different proteins - protein_name = "Protein" - protein = protein_name # TODO: we'll need to update this into a list and iterate over it - lipids = list(np.unique(self.database.selected.resnames)) - sub_data = { - k: {"category": k, "value": 0} for k in lipids - } # TODO: we need to generate sub_data for each protein. - js = {protein: {k: [] for k in lipids}} - - if not isinstance(self.metrics, pd.DataFrame): - self.metrics = self.contacts_to_metrics() - # get dictionary metrics - metric_dict = ( - self.metrics.groupby(["Residue ID", "Lipid Type"])[metric] - .count() - .reset_index() - ) - metric_dict = ( - pd.pivot_table( - metric_dict, index=["Residue ID"], values=metric, columns=["Lipid Type"] - ) - .fillna(0) - .to_dict("index") - ) - - for res in self.residue_ids: - if res not in metric_dict.keys(): - metric_dict[res] = {k: 0 for k in lipids} - - # [Resid: {'Lip A': 2, 'Lip B': 3}] - metric_dict = dict(OrderedDict(sorted(metric_dict.items(), key=lambda x: x[0]))) - - for idx, contact_counter in enumerate(metric_dict.values()): - for lipid, contact_sum in contact_counter.items(): - sub_data[lipid]["value"] += contact_sum - metric_transformation = ( - contact_sum * self.dt - ) / self.totaltime # TODO: do we have to substract 1 frame here? - if not metric_transformation > 0: - continue - - js[protein][lipid].append( - { - "residue": f"{self.residue_names[idx]} {self.residue_ids[idx]}", - "value": float("{:.2f}".format(metric_transformation)), - } - ) - - sub_data = list(sub_data.values()) - norm_with = sum([x["value"] for x in sub_data]) - sub_data = [ - { - "category": d["category"], - "value": "{:.2f}".format(d["value"] / norm_with), - } - for d in sub_data - ] - - # return js, {protein: sub_data} - - # TODO: - # Hardcoded - proteins = [protein_name] - protein_counts = {protein_name: 1} - - pie_data = [] - for protein in proteins: - value = protein_counts[protein] / sum(protein_counts.values()) - - protein_pdata = { - "category": protein, - "value": "{:.2f}".format(value), - "subData": sub_data, - } - pie_data.append(protein_pdata) - - # ganttApp toy data - gantt_data = [ - { - "category": "Lipid 1", - "startFrame": 0, - "endFrame": 10, - }, - { - "category": "Lipid 1", - "startFrame": 45, - "endFrame": 75, - }, - { - "category": "Lipid 1", - "startFrame": 90, - "endFrame": 100, - }, - { - "category": "Lipid 2", - "startFrame": 10, - "endFrame": 35, - }, - { - "category": "Lipid 2", - "startFrame": 30, - "endFrame": 60, - }, - ] - top_10_lipids = ["Lipid 1", "Lipid 2"] - - # payload should include the entire data. The backend can process it then based on client requests - payload = { - "data": js, - "proteins": [protein], - "lipids": lipids, - "pie_data": pie_data, # TODO: include protein info - "gantt_data": gantt_data, - "top_10_lipids": top_10_lipids, - } - - return payload - - def __str__(self): - if self.contacts == None: - return "" - else: - return "".format(len(self.contacts)) - - def __repr__(self): - if self.contacts == None: - return "" - else: - return "".format(len(self.contacts)) diff --git a/prolint2/core/__init__.py b/prolint2/core/__init__.py new file mode 100644 index 0000000..849cb85 --- /dev/null +++ b/prolint2/core/__init__.py @@ -0,0 +1,3 @@ +from .universe import Universe +from .groups import ExtendedAtomGroup +from .contact_provider import ContactsProvider \ No newline at end of file diff --git a/prolint2/core/base.py b/prolint2/core/base.py new file mode 100644 index 0000000..cfd993f --- /dev/null +++ b/prolint2/core/base.py @@ -0,0 +1,37 @@ +import numpy as np +from MDAnalysis.core.topologyattrs import ResidueStringAttr + +class MacrosClass(ResidueStringAttr): + attrname = "macros" + singular = "macro" + + def __init__(self, universe): + n_atoms, n_residues, n_segments = universe.atoms.n_atoms, universe.residues.n_residues, universe.segments.n_segments + values = self._gen_initial_values(n_atoms, n_residues, n_segments) + super().__init__(values) + + @staticmethod + def _gen_initial_values(n_atoms, n_residues, n_segments): + return np.array(["other"] * n_residues, dtype=object) + + @staticmethod + def set_macros_values(query, n_proteins=None): + protein_segments = query.segments + + if len(protein_segments) == 1 and protein_segments.n_atoms == query.n_atoms: + for segment_idx, segment in enumerate(protein_segments): + segment.residues.macros = "Protein" + str(segment_idx) + else: + resseq = query.residues.resids + res0, first_last_index, first_index = resseq[0], [], 0 + + for last_index, res in enumerate(resseq): + if res < res0: + first_last_index.append((first_index, last_index - 1)) + first_index = last_index + res0 = res + first_last_index.append((first_index, last_index)) + + for idx, (first_index, last_index) in enumerate(first_last_index): + selected_residues = query.residues[first_index:last_index + 1] + selected_residues.macros = "Protein" + str(idx) diff --git a/prolint2/core/contact_provider.py b/prolint2/core/contact_provider.py new file mode 100644 index 0000000..52c0b67 --- /dev/null +++ b/prolint2/core/contact_provider.py @@ -0,0 +1,280 @@ +from collections import defaultdict +from typing import Callable, Literal + +import numpy as np +import pandas as pd + +from prolint2.computers.contacts import ContactComputerBase, SerialContacts +from prolint2.core.typing import NestedFloatDict, NestedIterFloatDict, NestedIterIntDict, LipidId + +from prolint2.metrics.base import BaseContactStore +from prolint2.metrics.exact_contacts import ExactContacts +from prolint2.metrics.aprox_contacts import AproxContacts + +from prolint2.config.units import DEFAULT_SIM_PARAMS + + +class ComputedContacts: + """A class to compute contacts between residues and lipids. + + Parameters + ---------- + contact_strategy_instance : BaseContactStore + An instance of a contact strategy class. + provider : ContactsProvider + The contact provider that will be used to compute contacts. + + """ + def __init__(self, contact_strategy_instance: BaseContactStore, provider: 'ContactsProvider'): + self._contact_strategy = contact_strategy_instance + self.provider = provider + + def compute_metric(self, metric: str, target_lipid_name=None) -> NestedFloatDict: + """Compute a pre-defined metric for all lipids or a specific lipid. + + Parameters + ---------- + metric : str + The metric to compute. Must be one of 'max', 'sum', 'mean'. + target_lipid_name : str, optional + The name of the lipid to compute the metric for. If None, the metric will be computed for all lipids. + + Returns + ------- + Dict[str, Dict[str, Dict[int, float]]] + A dictionary of computed metrics for all lipids. + + Examples + -------- + >>> c.compute('max') + >>> c.compute('sum', 'DOPC') + >>> c.compute('median') # raises ValueError. Use `apply_function` instead. + """ + + return self._contact_strategy.compute(metric, target_lipid_name=target_lipid_name) + + def apply_function(self, func: Callable, target_lipid_name=None) -> NestedFloatDict: + """Apply the given function to the contacts for the given lipid name. """ + return self._contact_strategy.apply_function(func, target_lipid_name=target_lipid_name) + + @property + def contacts(self) -> NestedIterFloatDict: + """The computed contacts.""" + return self._contact_strategy.contacts + + @property + def pooled_contacts(self) -> NestedIterFloatDict: + """The computed contacts.""" + return self._contact_strategy.pooled_results() + + @property + def contact_frames(self) -> NestedIterIntDict: + """The computed contacts.""" + return self._contact_strategy.contact_frames + + def create_dataframe(self, n_frames: int) -> pd.DataFrame: + """Create a pandas DataFrame from the computed contacts. + + Parameters + ---------- + n_frames : int + The number of frames in the trajectory. + + Returns + ------- + pd.DataFrame + A pandas DataFrame with the computed contacts. + """ + keys = [] + contact_arrays = [] + + for residue_id, lipid_name_dict in self.contact_frames.items(): + for lipid_id, frame_indices in lipid_name_dict.items(): + contact_array = np.zeros(n_frames, dtype=np.int8) + contact_array[frame_indices] = 1 + + keys.append((residue_id, lipid_id)) + contact_arrays.append(contact_array) + + df = pd.DataFrame(contact_arrays, index=pd.MultiIndex.from_tuples(keys, names=['ResidueID', 'LipidId'])) + df = df.sort_index(level=['ResidueID', 'LipidId'], ascending=[True, True]) + + return df + + def get_lipids_by_residue_id(self, residue_id: int) -> list: + """Get all LipidIds that interact with the given ResidueID.""" + return sorted(list(self.contact_frames[residue_id].keys())) + + def get_residues_by_lipid_id(self, lipid_id: int) -> list: + """Get all ResidueIDs that interact with the given LipidId.""" + residues = [residue_id for residue_id, lipid_name_dict in self.contact_frames.items() if lipid_id in lipid_name_dict.keys()] + return residues + + def get_contact_data(self, residue_id: int, lipid_id: int, output: str = 'contacts') -> list: + """Get the contact data for a given residue and lipid. + + Parameters + ---------- + residue_id : int + The residue id. + lipid_id : int + The lipid id. + output : str, optional + The output format. Must be one of 'contacts' or 'indices'. + + Returns + ------- + list + A list of contacts or frame indices. + """ + + frame_indices = self.contact_frames[residue_id][lipid_id] + + if output == 'indices': + return frame_indices + else: + n_frames = max([max(frame_indices_list) for frame_indices_list in self.contact_frames[residue_id].values()]) + 1 + contact_array = [1 if i in frame_indices else 0 for i in range(n_frames)] + return contact_array + + + def intersection(self, other: 'ComputedContacts') -> 'ComputedContacts': + """Compute the intersection of two contact providers. Note that ProLint contacts use a radial cutoff. + This means that the intersection between two contact providers (c1 and c2) will be equal to the contact provider + with the smallest cutoff. ProLint, however, defines the intersection between two contact providers (c1 and c2) to + be equal to the lipid ids of the contact provider with the smallest cutoff, and the frame indices of the contact + provider with the largest cutoff. This way the intersection between two contact providers is meaningful and + computationaly allows for chaining of contact providers (See example below). + + Parameters + ---------- + other : ComputedContacts + The other contact provider to compute the intersection with. + + Returns + ------- + ContactsProvider + A new contact provider with the intersection of the contacts of both contact providers. + + Examples + -------- + >>> ts = Universe('coordinates.gro', 'trajectory.xtc') + >>> c1 = ts.compute_contacts(cutoff=7) + >>> c2 = ts.compute_contacts(cutoff=8) + >>> c3 = c1 + c2 + >>> c1 + c2 == c2 + c1 # True + """ + result_data = defaultdict(lambda: defaultdict(list)) + + for residue_id, lipid_ids in self.contact_frames.items(): + for lipid_id in lipid_ids: + if LipidId(lipid_id) in other.contact_frames[residue_id]: + result_data[residue_id][lipid_id] = other.contact_frames[residue_id][lipid_id] + + # Create a new instance of the contact strategy class + contact_instances = self._contact_strategy.__class__(self.provider.query.universe, result_data) + contact_instances.norm_factor = self.provider.params.get('norm_factor', 1) + contact_instances.run() + + return ComputedContacts(contact_instances, self.provider) + + def difference(self, other: 'ComputedContacts') -> 'ComputedContacts': + """Compute the difference of two contact providers. Given two contact providers (c1 and c2), the difference + between them (c2 -c1) is defined as the contacts of c2 that are not present in c1. + + Parameters + ---------- + other : ComputedContacts + The other contact provider to compute the difference with. + + Returns + ------- + ContactsProvider + A new contact provider with the difference of the contacts of both contact providers. + + Examples + -------- + >>> ts = Universe('coordinates.gro', 'trajectory.xtc') + >>> c1 = ts.compute_contacts(cutoff=7) + >>> c2 = ts.compute_contacts(cutoff=8) + >>> c3 = c2 - c1 + >>> c1 - c2 == c2 - c1 # False, c1 - c2 will be an empty contact provider if c1 is a subset of c2 + """ + + result_data = defaultdict(lambda: defaultdict(list)) + + for residue_id, lipid_ids in self.contact_frames.items(): + for lipid_id in lipid_ids: + if LipidId(lipid_id) not in other.contact_frames[residue_id]: + result_data[residue_id][lipid_id] = self.contact_frames[residue_id][lipid_id] + + # Create a new instance of the contact strategy class + contact_instances = self._contact_strategy.__class__(self.provider.query.universe, result_data) + contact_instances.run() + + return ComputedContacts(contact_instances, self.provider) + + def __add__(self, other: 'ComputedContacts') -> 'ComputedContacts': + return self.intersection(other) + + def __sub__(self, other: 'ComputedContacts') -> 'ComputedContacts': + return self.difference(other) + + +class ContactsProvider: + """ + Class that provides the contacts computation functionality. + """ + def __init__(self, query, database, params=None, compute_strategy: Literal['default'] = 'default', contact_strategy: Literal['exact', 'aprox'] = 'exact'): + self.query = query + self.database = database + + self._contact_computers = { + 'default': SerialContacts + } + self._contact_counter = { + 'exact': ExactContacts, + 'aprox': AproxContacts + } + self._compute_strategy = compute_strategy + self._contact_strategy = self._contact_counter[contact_strategy] + + self.params = params if params is not None else DEFAULT_SIM_PARAMS + + def compute(self, strategy_or_computer=None, **kwargs): + """ + Compute contacts between the query and the database. + + Parameters + ---------- + strategy_or_computer : str or ContactComputerBase, optional + The strategy to compute contacts. If None, the default strategy is used. + **kwargs + Additional arguments to pass to the contact computer. + + Returns + ------- + ComputedContacts + The computed contacts. + """ + if strategy_or_computer is None: + strategy_or_computer = self._compute_strategy + + # Strategy to compute contacts (e.g. serial, parallel, etc.) + if isinstance(strategy_or_computer, ContactComputerBase): + contact_computer = strategy_or_computer + else: + contact_computer_class = self._contact_computers.get(strategy_or_computer, None) + if contact_computer_class is None: + strats = ', '.join(self._contact_computers.keys()) + raise ValueError(f"Unknown strategy or computer: {strategy_or_computer}. Available strategies are: {strats}.") + contact_computer = contact_computer_class( + self.query.universe, self.query, self.database, **kwargs + ) + contact_computer.run(verbose=True) + + # Strategy to count and store contacts (e.g. exact, aprox, etc.) + contact_strategy_instance = self._contact_strategy(self.query.universe, contact_computer.contact_frames, self.params.get('norm_factor')) + contact_strategy_instance.run() + + return ComputedContacts(contact_strategy_instance, self) diff --git a/prolint2/core/groups.py b/prolint2/core/groups.py new file mode 100644 index 0000000..cd582f4 --- /dev/null +++ b/prolint2/core/groups.py @@ -0,0 +1,168 @@ +from abc import ABC, abstractmethod +from typing import Iterable, Union, Dict +from collections import Counter + +import numpy as np +import MDAnalysis as mda + +class PLAtomGroupBase(ABC): + """An abstract base class for AtomGroup objects.""" + + @abstractmethod + def add(self, resname=None, atomname=None, resnum=None, atomids=None): + """ Add atoms to the query or database.""" + + @abstractmethod + def remove(self, resname=None, atomname=None, resnum=None, atomids=None): + """ Remove atoms from the query or database.""" + + @abstractmethod + def get_resname(self, resid: int): + """ Get the residue name of a residue in the AtomGroup.""" + + @abstractmethod + def get_resnames(self, resids: Iterable[int]): + """ Get the residue names of a list of residues in the AtomGroup.""" + + @abstractmethod + def get_resid(self, resname: str): + """ Get the residue ID of a residue in the AtomGroup.""" + + @abstractmethod + def get_resids(self, resnames: Iterable[str]): + """ Get the residue IDs of a list of residues in the AtomGroup.""" + + @abstractmethod + def filter_resids_by_resname(self, resids: np.ndarray, resname: str): + """ Filter the residue IDs by residue name.""" + + @property + @abstractmethod + def unique_resnames(self): + """ Get the unique residue names in the AtomGroup.""" + + @property + @abstractmethod + def resname_counts(self): + """ Get the number of residues of each residue name in the AtomGroup.""" + +class ExtendedAtomGroup(mda.AtomGroup, PLAtomGroupBase): + """An extended version of the MDAnalysis AtomGroup class.""" + + def __init__(self, *args, **kwargs): + """Initialize the AtomGroup.""" + super().__init__(*args, **kwargs) + self._resname_resid_labels = self._build_resname_resid_labels() + self._stored_resnames = self.residues.resnames + self._stored_resids = self.residues.resids + + def _build_resname_resid_labels(self): + """Build a dictionary of residue names and residue IDs.""" + resnames = self.residues.resnames + resids = self.residues.resids + + return dict(zip(resids, resnames)) + + def _build_stored_resnames(self): + """Build a dictionary of residue names and residue IDs.""" + resnames = self.residues.resnames + return resnames + + def _build_selection_string(self, resname=None, atomname=None, resnum=None, atomids=None): + selections = [] + + if resname is not None: + if isinstance(resname, str): + resname = [resname] + selections.append("resname " + " or resname ".join(resname)) + + if atomname is not None: + if isinstance(atomname, str): + atomname = [atomname] + selections.append("name " + " or name ".join(atomname)) + + if resnum is not None: + resnum = map(str, resnum) + selections.append("resid " + " or resid ".join(resnum)) + + if atomids is not None: + atomids = map(str, atomids) + selections.append("bynum " + " or bynum ".join(atomids)) + + if not selections: + raise ValueError("At least one selection criterion must be provided") + + return " or ".join(selections) + + def add(self, resname=None, atomname=None, resnum=None, atomids=None): + """Add atoms to the query or database.""" + selection_string = self._build_selection_string(resname, atomname, resnum, atomids) + new_group = self.universe.atoms.select_atoms(selection_string) + new_group = self | new_group + + return self.__class__(new_group) + + def remove(self, resname=None, atomname=None, resnum=None, atomids=None): + """Remove atoms from the query or database.""" + selection_string = self._build_selection_string(resname, atomname, resnum, atomids) + atoms_to_remove = self.select_atoms(selection_string) + new_group = self - atoms_to_remove + + return self.__class__(new_group) + + def get_resname(self, resid: int): + """Get the residue name of a residue in the AtomGroup.""" + return self._resname_resid_labels[resid] + + def get_resnames(self, resids: Iterable[int], out: Union[list, Dict[int, str]] = list): + """Get the residue names of a list of residues in the AtomGroup.""" + if out is list: + return [self._resname_resid_labels[resid] for resid in resids] + elif out is dict: + return {resid: self._resname_resid_labels[resid] for resid in resids} + else: + raise ValueError("out must be either list or dict") + + def get_resid(self, resname: str): + """Get the residue ID of a residue in the AtomGroup.""" + return self.residues.resids[self.residues.resnames == resname][0] + + def get_resids(self, resnames: Iterable[str], out: Union[list, Dict[str, int]] = list): + """Get the residue IDs of a list of residues in the AtomGroup.""" + if out is list: + return [self.get_resid(resname) for resname in resnames] + elif out is dict: + return {resname: self.get_resid(resname) for resname in resnames} + else: + raise ValueError("out must be either list or dict") + + def filter_resids_by_resname(self, resids: Iterable[int], resname: str): + """Filter the residue IDs by residue name.""" + resids = np.asarray(resids) + all_resnames = self._stored_resnames + all_resids = self._stored_resids + # print ('shapes', all_resnames.shape, all_resids.shape, resids.shape) + indices = np.searchsorted(all_resids, resids) + return resids[np.where(all_resnames[indices] == resname)[0]] + + @staticmethod + def static_filter_resids_by_resname(resids: np.ndarray, resnames: np.ndarray, resids_subset: np.ndarray, resname: str): + """Filter the residue IDs by residue name.""" + indices = np.searchsorted(resids, resids_subset) + return resids_subset[np.where(resnames[indices] == resname)[0]] + + @property + def unique_resnames(self): + """Get the unique residue names in the AtomGroup.""" + return np.unique(self.residues.resnames) + + @property + def resname_counts(self): + """Get the number of residues of each residue name in the AtomGroup.""" + return Counter(self.residues.resnames) + + def __str__(self) -> str: + return f"" + + def __repr__(self) -> str: + return f"" diff --git a/prolint2/core/typing.py b/prolint2/core/typing.py new file mode 100644 index 0000000..eb81cb1 --- /dev/null +++ b/prolint2/core/typing.py @@ -0,0 +1,18 @@ +from typing import Dict, Iterable, DefaultDict, TypeVar, NewType +from typing_extensions import TypeAlias + +T = TypeVar('T') +ResidueID = NewType("ResidueID", int) +LipidName = NewType("LipidName", str) +LipidId = NewType("LipidId", int) +FrameIndex = NewType("FrameIndex", int) + +NestedDictT: TypeAlias = DefaultDict[ResidueID, DefaultDict[LipidName, Dict[LipidId, T]]] + +NestedFloatDict: TypeAlias = NestedDictT[float] +NestedIterFloatDict: TypeAlias = NestedDictT[Iterable[float]] +NestedIterIntDict: TypeAlias = NestedDictT[Iterable[FrameIndex]] + +# TODO: Fix the type definitions above to this: +# NestedDictT: TypeAlias = DefaultDict[ResidueID, DefaultDict[LipidId, T]] +# NestedIterIntDict: TypeAlias = NestedDictT[Iterable[FrameIndex]] diff --git a/prolint2/core/universe.py b/prolint2/core/universe.py new file mode 100644 index 0000000..58fadac --- /dev/null +++ b/prolint2/core/universe.py @@ -0,0 +1,150 @@ +import warnings +from typing import Literal, get_args + +import MDAnalysis as mda + +from prolint2.core.base import MacrosClass +from prolint2.core.groups import ExtendedAtomGroup +from prolint2.metrics.registries import MetricRegistry +from prolint2.core.contact_provider import ContactsProvider + +from prolint2.config.units import UnitConversionFactor + +warnings.filterwarnings('ignore') + +TimeUnitLiteral = Literal['fs', 'ps', 'ns', 'us', 'ms', 's'] + +# Build VALID_UNITS from TimeUnitLiteral +VALID_UNITS = get_args(TimeUnitLiteral) + +class Universe(mda.Universe): + """A subclass of MDAnalysis.Universe that adds a query and database attribute, and other useful methods.""" + def __init__(self, *args, universe=None, query=None, database=None, normalize_by: Literal['counts', 'actual_time', 'time_fraction'] = 'time_fraction', units: TimeUnitLiteral = 'us', **kwargs): + if universe is not None: + if isinstance(universe, mda.Universe): + topology = universe.filename + trajectory = universe.trajectory.filename + super().__init__(topology, trajectory) + else: + raise TypeError("universe argument should be an instance of mda.Universe") + else: + super().__init__(*args, **kwargs) + + self._query = self._handle_query(query) + self._database = self._handle_database(database) + + self.params = { + 'units': units, + 'normalizer': normalize_by, + 'unit_conversion_factor': self._handle_units(units), + 'norm_factor': self._handle_normalizer(normalize_by, units) + } + + self.registry = MetricRegistry() + + self._add_macros() + + def _add_macros(self): + macros_attr = MacrosClass(self) + self.atoms.universe.add_TopologyAttr(macros_attr) + macros_attr.set_macros_values(self.query) + + def _handle_query(self, query): + if query is None: + query_selection_string = "protein" + query = self.select_atoms(query_selection_string) + return ExtendedAtomGroup(query) + + def _handle_database(self, database): + if database is None: + database_selection_string = "not protein" + database = self.select_atoms(database_selection_string) + return ExtendedAtomGroup(database) + + def _handle_units(self, units): + if isinstance(units, str): + if units in UnitConversionFactor.__members__: + units = UnitConversionFactor[units] + else: + raise ValueError(f"units argument must be one of {UnitConversionFactor.__members__}") + time_unit = self._set_default_time_unit() + return UnitConversionFactor[time_unit].value / units.value + + def _handle_normalizer(self, normalize_by, units): + if normalize_by not in ['counts', 'actual_time', 'time_fraction']: + raise ValueError("normalize_by argument must be one of ['counts', 'actual_time', 'time_fraction']") + norm_factors = { + 'counts': 1.0, + 'actual_time': float(self.trajectory.dt * self._handle_units(units)), + 'time_fraction': float(self.trajectory.dt / self.trajectory.totaltime) + } + return norm_factors[normalize_by] + + def _set_default_time_unit(self): + traj_time_unit = self.trajectory.units.get('time', None) + if traj_time_unit is None: + warnings.warn("Trajectory time unit is not set. Assuming 'ps'.") + + return traj_time_unit if traj_time_unit is not None else 'ps' + + @property + def query(self): + """The query AtomGroup. This is the group of atoms that are used as reference during contact calculation.""" + return ExtendedAtomGroup(self._query) + + @query.setter + def query(self, new_query): + if not isinstance(new_query, mda.AtomGroup): + raise TypeError("query attribute must be an instance of mda.AtomGroup") + self._query = new_query + + def update_query(self, new_query): + """Update the query AtomGroup with a new AtomGroup.""" + self.query = new_query + + @property + def database(self): + """The database AtomGroup. This is the group of atoms that are used as target during contact calculation.""" + return ExtendedAtomGroup(self._database) + + @database.setter + def database(self, new_database): + if not isinstance(new_database, mda.AtomGroup): + raise TypeError("database attribute must be an instance of mda.AtomGroup") + self._database = new_database + + def update_database(self, new_database): + """Update the database AtomGroup with a new AtomGroup.""" + self.database = new_database + + def compute_contacts(self, *args, **kwargs): + """Compute contacts between the query and database AtomGroups.""" + contacts_provider = ContactsProvider(self.query, self.database, params=self.params) + return contacts_provider.compute(*args, **kwargs) + + @property + def units(self): + """The units of the trajectory time.""" + return self.params['units'] + + @units.setter + def units(self, new_units): + self.params['unit_conversion_factor'] = self._handle_units(new_units) + self.params['units'] = new_units + self.params['norm_factor'] = self._handle_normalizer(self.params['normalizer'], new_units) + + @property + def normalize_by(self): + """The normalizer of the trajectory time.""" + return self.params['normalizer'] + + @normalize_by.setter + def normalize_by(self, new_normalizer): + self.params['norm_factor'] = self._handle_normalizer(new_normalizer, self.params['units']) + self.params['normalizer'] = new_normalizer + + def __str__(self) -> str: + return f"" + + def __repr__(self) -> str: + return f"" diff --git a/prolint2/metrics/__init__.py b/prolint2/metrics/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/prolint2/metrics/aprox_contacts.py b/prolint2/metrics/aprox_contacts.py new file mode 100644 index 0000000..96e5d6b --- /dev/null +++ b/prolint2/metrics/aprox_contacts.py @@ -0,0 +1,130 @@ +from typing import List, Dict, Callable, Union + +from collections import defaultdict + +import numpy as np + +from prolint2.metrics.base import BaseContactStore +from prolint2.metrics.utils import fast_filter_resids_by_resname + +class AproxContacts(BaseContactStore): + """Compute the duration of lipid contacts. This class is used to compute the duration of lipid contacts. """ + + def run(self, lipid_resnames: Union[str, List] = None) -> Dict[str, np.ndarray]: + """Compute the duration of lipid contacts for all lipid types. + + Parameters + ---------- + lipid_resnames : str, optional + A list of lipid residue names to compute durations for. If None, durations will be computed for all lipid types. + + Returns + ------- + Dict[str, np.ndarray] + A dictionary of lipid contact durations for all lipid types. + """ + if lipid_resnames is None: + lipid_resnames = self._database_unique_resnames + elif isinstance(lipid_resnames, str): + lipid_resnames = [lipid_resnames] + + for residue, contact_frame in self.contact_frames.items(): + for lipid_resname in lipid_resnames: + ids_to_filter = np.array(list(contact_frame.keys())) + lipid_ids = fast_filter_resids_by_resname(self._resids, self._resnames, ids_to_filter, lipid_resname) + for lipid_id in lipid_ids: + # print ('-> ', contact_frame[lipid_id]) + self._contacts[residue][lipid_resname][lipid_id] = contact_frame[lipid_id] + + def pooled_results(self, target_lipid_name: Union[str, None] = None) -> Dict[str, np.ndarray]: + """Get the duration of lipid contacts for all lipid types pooled together. + + Parameters + ---------- + target_lipid_name : str, optional + A list of lipid residue names to compute durations for. If None, durations will be computed for all lipid types. + + Returns + ------- + Dict[str, np.ndarray] + A dictionary of lipid contact durations for all lipid types. + """ + + pooled_results = defaultdict(lambda: defaultdict(list)) + for residue, lipid_data in self._contacts.items(): + for lipid_name, lipid_contacts in lipid_data.items(): + if target_lipid_name is None or lipid_name == target_lipid_name: + pooled_contact_array = [] + for lipid_id_contacts in lipid_contacts.values(): + lipid_contacts_count = len(lipid_id_contacts) * self.norm_factor + pooled_contact_array.append(lipid_contacts_count) + pooled_results[residue][lipid_name].extend(pooled_contact_array) + return pooled_results + + def compute_metric(self, metric: str, target_lipid_name=None): + """Compute a pre-defined metric for all lipids or a specific lipid. + + Parameters + ---------- + metric : str + The metric to compute. Must be one of 'max', 'sum', 'mean'. + target_lipid_name : str, optional + The name of the lipid to compute the metric for. If None, the metric will be computed for all lipids. + + Returns + ------- + Dict[str, Dict[str, Dict[int, float]]] + A dictionary of computed metrics for all lipids. + """ + + computed_results = {} + for residue, lipid_data in self._contacts.items(): + computed_results[residue] = {} + for lipid_name, lipid_contacts in lipid_data.items(): + if target_lipid_name is None or lipid_name == target_lipid_name: + computed_contacts_per_id = {} + for lipid_id, contact_array in lipid_contacts.items(): + ones_array = list(np.ones_like(contact_array)) + computed_metric = getattr(np, metric)(ones_array) + computed_contacts_per_id[lipid_id] = float(computed_metric) + # print ('contact_array', lipid_name, lipid_id, computed_metric, metric) + computed_results[residue][lipid_name] = computed_contacts_per_id + return computed_results + + def apply_function(self, func: Callable, target_lipid_name=None): + """Apply a function to all lipids or a specific lipid. + + Parameters + ---------- + func : Callable + The function to apply to the lipid contact durations. + target_lipid_name : str, optional + The name of the lipid to apply the function to. If None, the function will be applied to all lipids. + conversion_func : Callable, optional + The function to convert the contact array before calling `func`. If None, no conversion will be applied. + + Returns + ------- + Dict[str, Dict[str, Dict[int, float]]] + A dictionary of computed metrics for all lipids. + + Example + ------- + >>> cd = AproxContacts(...) + >>> cd.run() + >>> cd.apply_function(np.mean) + >>> cd.apply_function(np.max, target_lipid_name='DOPC') + >>> cd.apply_function(lambda x: np.mean(x) / np.max(x), target_lipid_name='DOPC') + """ + computed_results = {} + for residue, lipid_data in self._contacts.items(): + computed_results[residue] = {} + for lipid_name, lipid_contacts in lipid_data.items(): + if target_lipid_name is None or lipid_name == target_lipid_name: + computed_contacts_per_id = {} + for lipid_id, contact_array in lipid_contacts.items(): + ones_array = list(np.ones_like(contact_array)) + computed_metric = func(ones_array) + computed_contacts_per_id[lipid_id] = float(computed_metric) + computed_results[residue][lipid_name] = computed_contacts_per_id + return computed_results diff --git a/prolint2/metrics/base.py b/prolint2/metrics/base.py new file mode 100644 index 0000000..95b798a --- /dev/null +++ b/prolint2/metrics/base.py @@ -0,0 +1,160 @@ +from abc import ABC, abstractmethod +from collections import defaultdict + +from typing import Type, List, Union, Callable + +from scipy.optimize import curve_fit +from prolint2.metrics.formatters import OutputFormat, DefaultOutputFormat + + +MetricRegistry = Type["registries.MetricRegistry"] + +class BaseMetric(ABC): + """Base class for all metrics classes that act on single frame contact Iterables.""" + name: str = None + + def __init__(self): + pass + + @abstractmethod + def compute_metric(self, contact_array): + pass + + @classmethod + def _register(cls, registry: MetricRegistry): + registry.register(cls.name, cls) + +class Metric(ABC): + """Base class for metric calculation.""" + def __init__(self, contacts, metrics, output_format: OutputFormat = DefaultOutputFormat(), lipid_type=None, clear=True): + self.contact_input = dict(sorted(contacts.contacts.items())) + + if not isinstance(metrics, list): + metrics = [metrics] + self.metrics = metrics + if clear: + output_format.clear() + self.output_format = output_format + self.lipid_type = lipid_type + + def compute(self, dt=1, totaltime=1): + """Compute the metric for the given contacts. """ + multiplier = dt / totaltime + for residue_id, lipid_dict in self.contact_input.items(): + for lipid_name, contact_array in lipid_dict.items(): + if self.lipid_type is not None and self.lipid_type != lipid_name: + continue + # contact_array = list(lipid_contacts.values()) + + if contact_array: + for metric in self.metrics: + # if max(contact_array) > 1: + # print ('contact_array', residue_id, lipid_name, max(contact_array)) + value = metric.compute_metric(contact_array) * multiplier + # print ('value', residue_id, lipid_name, value, multiplier) + self.output_format.store_result(residue_id, lipid_name, metric.__class__.__name__, value) + else: + for metric in self.metrics: + self.output_format.store_result(residue_id, lipid_name, metric.__class__.__name__, 0) + + return self.output_format.get_result() + +class BaseContactStore: + """Base class for storing contact. """ + def __init__(self, ts, contact_frames, norm_factor: float = 1.0): + + self.norm_factor = float(norm_factor) + self.contact_frames = contact_frames + + self._resids = ts.database.residues.resids + self._resnames = ts.database.residues.resnames + self._database_unique_resnames = ts.database.unique_resnames + self._contacts = defaultdict(lambda: defaultdict(dict)) + + def run(self, lipid_resnames: Union[str, List] = None): + """Run the contact calculation for the given lipid resnames. If no resnames are given, all resnames are used. """ + raise NotImplementedError("Subclasses should implement this method.") + + def compute(self, metric: str, target_lipid_name=None): + """Compute a pre-defined metric for all lipids or a specific lipid. + + Parameters + ---------- + metric : str + The metric to compute. Must be one of 'max', 'sum', 'mean'. + target_lipid_name : str, optional + The name of the lipid to compute the metric for. If None, the metric will be computed for all lipids. + + Returns + ------- + Dict[str, Dict[str, Dict[int, float]]] + A dictionary of computed metrics for all lipids. + + Examples + -------- + >>> cd = AproxContacts(...) + >>> cd.run() + >>> cd.compute('max') + >>> cd.compute('sum', 'DOPC') + >>> cd.compute('median') # raises ValueError. Use `apply_function` instead. + """ + + if metric in ['max', 'sum', 'mean']: + return self.compute_metric(metric, target_lipid_name) + else: + raise ValueError("Invalid metric specified. Use 'max', 'sum', 'mean'. For more complex metrics, use `apply_function`.") + + def compute_metric(self, metric: str, target_lipid_name=None): + """Compute the given metric for the given lipid name. """ + raise NotImplementedError("Subclasses should implement this method.") + + def apply_function(self, func: Callable, target_lipid_name=None): + """Apply the given function to the contacts for the given lipid name. """ + raise NotImplementedError("Subclasses should implement this method.") + + def pooled_results(self): + """Get the computed contacts all pooled together. """ + raise NotImplementedError("Subclasses should implement this method.") + + @property + def results(self): + """Get the computed contacts per lipid id. """ + if self._contacts is None: + raise ValueError('No contacts have been computed yet. Call run() first.') + return self._contacts + + @property + def contacts(self): + """Get the computed contacts all pooled together. """ + if self._contacts is None: + raise ValueError('No contacts have been computed yet. Call run() first.') + return self.pooled_results() + +class FittingFunctionMeta(type): + """Metaclass for fitting functions.""" + def __init__(cls, name, bases, dct): + if not hasattr(cls, 'registry'): + cls.registry = {} + else: + cls.registry[cls.name] = cls + super().__init__(name, bases, dct) + +class FittingFunction(metaclass=FittingFunctionMeta): + """Base class for fitting functions.""" + name = None + p0 = [1, 1, 1, 1] + maxfev = 1000000 + + def compute(self, x, *params): + raise NotImplementedError("Subclasses must implement this method") + + def get_koff(self, popt): + raise NotImplementedError("Subclasses must implement this method") + + def fit(self, x_data, y_data, **kwargs): + if 'p0' not in kwargs: + kwargs['p0'] = self.p0 + if 'maxfev' not in kwargs: + kwargs['maxfev'] = self.maxfev + popt, _ = curve_fit(self.compute, x_data, y_data, **kwargs) + return popt diff --git a/prolint2/metrics/converters.py b/prolint2/metrics/converters.py new file mode 100644 index 0000000..8a27093 --- /dev/null +++ b/prolint2/metrics/converters.py @@ -0,0 +1,42 @@ +from abc import ABC, abstractmethod +from typing import Union + +from prolint2.metrics.formatters import OutputFormat, SingleOutputFormat +from prolint2.metrics.registries import MetricRegistry + +class BaseOutputFormatConverter(ABC): + def __init__(self, output_format: OutputFormat, metric_type: Union[str, int], metric_registry: MetricRegistry): + self.output_format = output_format + self.metric_type = metric_type + self.metric_registry = metric_registry + + @abstractmethod + def convert(self) -> OutputFormat: + pass + +class DefaultToSingleConverter(BaseOutputFormatConverter): + def convert(self) -> SingleOutputFormat: + single_output_format = SingleOutputFormat() + metric_class_name = self.metric_registry.get_metric(self.metric_type).__name__ + + if not isinstance(self.metric_type, str): + raise ValueError('The metric type must be an string when using the DefaultToSingleConverter') + + for residue_id, lipid_data in self.output_format.items(): + for lipid_id, metric_data in lipid_data.items(): + single_output_format.store_result(residue_id, lipid_id, self.metric_type, metric_data[metric_class_name]) + + return single_output_format + +class CustomToSingleConverter(BaseOutputFormatConverter): + def convert(self) -> SingleOutputFormat: + single_output_format = SingleOutputFormat() + + if not isinstance(self.metric_type, int): + raise ValueError('The metric type must be an integer when using the CustomToSingleConverter') + + for residue_id, lipid_data in self.output_format.items(): + for lipid_id, metric_values in lipid_data.items(): + single_output_format.store_result(residue_id, lipid_id, self.metric_type, metric_values[self.metric_type]) + + return single_output_format diff --git a/prolint2/metrics/exact_contacts.py b/prolint2/metrics/exact_contacts.py new file mode 100644 index 0000000..c81ff35 --- /dev/null +++ b/prolint2/metrics/exact_contacts.py @@ -0,0 +1,146 @@ +from typing import List, Dict, Callable, Union + +from collections import defaultdict + +import numpy as np + +from prolint2.metrics.base import BaseContactStore +from prolint2.metrics.utils import ( + fast_filter_resids_by_resname, + fast_contiguous_segment_lengths +) + + +class ExactContacts(BaseContactStore): + """Compute the duration of lipid contacts. This class is used to compute the duration of lipid contacts. """ + + def run(self, lipid_resnames: Union[str, List] = None) -> Dict[str, np.ndarray]: + """Compute the duration of lipid contacts for all lipid types. + + Parameters + ---------- + lipid_resnames : str, optional + A list of lipid residue names to compute durations for. If None, durations will be computed for all lipid types. + + Returns + ------- + Dict[str, np.ndarray] + A dictionary of lipid contact durations for all lipid types. + The output is stored in the `self._contacts` attribute. + """ + if lipid_resnames is None: + lipid_resnames = self._database_unique_resnames + elif isinstance(lipid_resnames, str): + lipid_resnames = [lipid_resnames] + + for residue, contact_frame in self.contact_frames.items(): + for lipid_resname in lipid_resnames: + result = self.compute_lipid_durations(contact_frame, lipid_resname) + if len(result) > 0: + self._contacts[residue][lipid_resname] = result + + def pooled_results(self, target_lipid_name=None): + """Pool results for all lipids. + + Parameters + ---------- + target_lipid_name : str, optional + The name of the lipid to compute pooled results for. If None, pooled results will be computed for all lipids. + + Returns + ------- + Dict[str, Dict[str, List[float]]] + A dictionary of pooled results for all lipids. + """ + pooled_results = defaultdict(lambda: defaultdict(list)) + for residue, lipid_data in self._contacts.items(): + for lipid_name, lipid_contacts in lipid_data.items(): + if target_lipid_name is None or lipid_name == target_lipid_name: + pooled_contact_array = [] + for lipid_id_contacts in lipid_contacts.values(): + pooled_contact_array.extend(lipid_id_contacts) + pooled_results[residue][lipid_name].extend(pooled_contact_array) + return pooled_results + + def compute_metric(self, metric: str, target_lipid_name=None): + """Compute a pre-defined metric for all lipids or a specific lipid. + + Parameters + ---------- + metric : str + The metric to compute. Must be one of 'max', 'sum', 'mean'. + target_lipid_name : str, optional + The name of the lipid to compute the metric for. If None, the metric will be computed for all lipids. + + Returns + ------- + Dict[str, Dict[str, Dict[int, float]]] + A dictionary of computed metrics for all lipids. + """ + + computed_results = defaultdict(lambda: defaultdict(dict)) + for residue, lipid_data in self._contacts.items(): + # computed_results[residue] = {} + for lipid_name, lipid_contacts in lipid_data.items(): + if target_lipid_name is None or lipid_name == target_lipid_name: + computed_contacts_per_id = {lipid_id: getattr(np, metric)(contact_array) for lipid_id, contact_array in lipid_contacts.items()} + computed_results[residue][lipid_name] = computed_contacts_per_id + return computed_results + + def apply_function(self, func: Callable, target_lipid_name=None): + """Apply a function to all lipids or a specific lipid. + + Parameters + ---------- + func : Callable + The function to apply to the lipid contact durations. + target_lipid_name : str, optional + The name of the lipid to apply the function to. If None, the function will be applied to all lipids. + + Returns + ------- + Dict[str, Dict[str, Dict[int, float]]] + A dictionary of computed metrics for all lipids. + + Example + ------- + >>> cd = ExactContacts(...) + >>> cd.run() + >>> cd.apply_function(np.mean) + >>> cd.apply_function(np.max, target_lipid_name='DOPC') + >>> cd.apply_function(lambda x: np.mean(x) / np.max(x), target_lipid_name='DOPC') + """ + computed_results = {} + for residue, lipid_data in self._contacts.items(): + computed_results[residue] = {} + for lipid_name, lipid_contacts in lipid_data.items(): + if target_lipid_name is None or lipid_name == target_lipid_name: + computed_contacts_per_id = {lipid_id: func(contact_array) for lipid_id, contact_array in lipid_contacts.items()} + computed_results[residue][lipid_name] = computed_contacts_per_id + return computed_results + + def compute_lipid_durations(self, contact_frame: Dict[int, List[int]], lipid_resname: str) -> np.ndarray: + """Compute the duration of lipid contacts. + + Parameters + ---------- + contact_frame : Dict[int, List[int]] + A dictionary of contact frames. + lipid_resname : str + The residue name of the lipid to compute durations for. + + Returns + ------- + np.ndarray + An array of lipid contact durations. + """ + + ids_to_filter = np.array(list(contact_frame.keys())) + lipid_ids = fast_filter_resids_by_resname(self._resids, self._resnames, ids_to_filter, lipid_resname) + + durations = {} + for k, arr in contact_frame.items(): + if k in lipid_ids: + durations[k] = fast_contiguous_segment_lengths(arr, self.norm_factor) + + return durations diff --git a/prolint2/metrics/fitters.py b/prolint2/metrics/fitters.py new file mode 100644 index 0000000..245042a --- /dev/null +++ b/prolint2/metrics/fitters.py @@ -0,0 +1,63 @@ + +import numpy as np +from scipy.optimize import curve_fit +from prolint2.metrics.base import FittingFunction + +class BiExpoFittingFunction(FittingFunction): + name = 'bi_expo' + p0 = [1, 1., 1., 1.] + maxfev = 1000000 + + def compute(self, x, k1, k2, A, B): + exp1 = np.exp(np.clip(-k1 * x, None, 700)) + exp2 = np.exp(np.clip(-k2 * x, None, 700)) + return A * exp1 + B * exp2 + + def get_koff(self, popt): + ks = [abs(k) for k in popt[:2]] + return np.min(ks) + +class MonoExpoFittingFunction(FittingFunction): + name = 'mono_expo' + p0 = [1, 1] + maxfev = 1000000 + + def compute(self, x, k, A): + exp = np.exp(np.clip(-k * x, None, 700)) + return A * exp + + def get_koff(self, popt): + return abs(popt[0]) + +class PolynomialFittingFunction(FittingFunction): + name = 'poly' + p0 = [1, 1, 1, 1] + maxfev = 1000000 + + def __init__(self, degree=None): + self.degree = degree if degree is not None else 1 + + def compute(self, x, *params): + return np.polyval(params, x) + + def get_koff(self, popt): + # Calculate koff based on the polynomial coefficients + raise NotImplementedError("koff calculation for polynomial fitting function is not implemented yet") + + def fit(self, x_data, y_data, **kwargs): + degree = kwargs.pop('degree', None) + if degree is not None: + self.degree = degree + if 'p0' not in kwargs and self.degree is not None: + kwargs['p0'] = [1] * (self.degree + 1) + kwargs.pop('degree', None) + popt, _ = curve_fit(self.compute, x_data, y_data, **kwargs) + return popt + +class FittingFunctionFactory: + @staticmethod + def get_fitting_function(name): + try: + return FittingFunction.registry[name]() + except KeyError: + raise ValueError(f"Invalid fitting function name: {name}") diff --git a/prolint2/metrics/formatters.py b/prolint2/metrics/formatters.py new file mode 100644 index 0000000..3d607f0 --- /dev/null +++ b/prolint2/metrics/formatters.py @@ -0,0 +1,86 @@ + +from collections import defaultdict +from abc import ABC, abstractmethod + +class OutputFormat(ABC): + def __init__(self): + self.class_name = self.__class__.__name__ + + @abstractmethod + def store_result(self, residue_id, lipid_id, metric_name, value): + pass + + @abstractmethod + def get_result(self): + pass + + @abstractmethod + def clear(self): + pass + +class DefaultOutputFormat(OutputFormat): + def __init__(self): + super().__init__() + self.results = defaultdict(lambda: defaultdict(dict)) + + def store_result(self, residue_id, lipid_id, metric_name, value): + self.results[residue_id][lipid_id][metric_name] = value + + def get_result(self): + return self.results + + def clear(self): + self.results = defaultdict(lambda: defaultdict(dict)) + +class CustomOutputFormat(OutputFormat): + def __init__(self): + super().__init__() + self.results = defaultdict(lambda: defaultdict(list)) + + def store_result(self, residue_id, lipid_id, metric_name, value): + self.results[residue_id][lipid_id].append(value) + + def get_result(self): + return self.results + + def clear(self): + self.results = defaultdict(lambda: defaultdict(list)) + +class SingleOutputFormat(OutputFormat): + def __init__(self): + super().__init__() + self.results = defaultdict(dict) + + def store_result(self, residue_id, lipid_id, metric_name, value): + self.results[residue_id][lipid_id] = value + + def get_result(self): + return self.results + + def clear(self): + self.results = defaultdict(dict) + +class ProLintDashboardOutputFormat(OutputFormat): + def __init__(self, residue_names=None, residue_ids=None): + super().__init__() + self.results = defaultdict(list) + self.residue_names = residue_names + self.residue_ids = residue_ids + + def store_result(self, residue_id, lipid_name, metric_name, value): + if not value > 0: + return + + self.results[lipid_name].append( + { + "residue": f"{self.residue_names[residue_id]} {self.residue_ids[residue_id]}", + "value": float(f"{value:.2f}"), + } + ) + # print ('store_result', f"{self.residue_names[residue_id]} {self.residue_ids[residue_id]}", float(f"{value:.2f}")) + + def get_result(self): + return self.results + + def clear(self): + self.results = defaultdict(list) diff --git a/prolint2/metrics/metrics.py b/prolint2/metrics/metrics.py new file mode 100644 index 0000000..b14b570 --- /dev/null +++ b/prolint2/metrics/metrics.py @@ -0,0 +1,69 @@ +from typing import Callable, Iterable +import numpy as np + +from prolint2.metrics.base import BaseMetric, Metric +from prolint2.metrics.registries import MetricRegistry +from prolint2.metrics.formatters import DefaultOutputFormat, SingleOutputFormat, ProLintDashboardOutputFormat, CustomOutputFormat + + +class UserDefinedMetric(BaseMetric): + name: str = 'custom' + def __init__(self, custom_function: Callable[[Iterable[int]], float]): + super().__init__() + self.custom_function = custom_function + + def compute_metric(self, contact_array): + return self.custom_function(contact_array) + +class MeanMetric(BaseMetric): + name: str = 'mean' + def compute_metric(self, contact_array): + return np.mean(contact_array) + +class SumMetric(BaseMetric): + name: str = 'sum' + def compute_metric(self, contact_array): + return np.sum(contact_array) + +class MaxMetric(BaseMetric): + name: str = 'max' + def compute_metric(self, contact_array): + return np.max(contact_array) + +def create_metric(contacts, metrics, output_format=None, custom_function: Callable = None, metric_registry: MetricRegistry=None, lipid_type=None, **kwargs): + if metric_registry is None: + raise ValueError("A MetricRegistry instance must be provided.") + + output_format_classes = { + 'default': DefaultOutputFormat, + 'custom': CustomOutputFormat, + 'single': SingleOutputFormat, + 'dashboard': ProLintDashboardOutputFormat, + } + + if len(metrics) != 1 and output_format == 'single': + raise ValueError("The 'single' output format can only be used with a single metric.") + + if len(metrics) == 1 and output_format is None or output_format == 'single': + output_format_class = SingleOutputFormat() + else: + if output_format is None: + output_format = 'default' + if output_format not in output_format_classes: + raise ValueError(f"Invalid output format '{output_format}'. Supported output formats are {list(output_format_classes.keys())}") + + output_format_class = output_format_classes[output_format](**kwargs) + + metric_objects = [] + for metric in metrics: + metric_class = metric_registry.get_metric(metric) + + if metric == 'custom': + if custom_function is not None: + metric_objects.append(metric_class(custom_function)) + else: + raise ValueError("A custom function must be provided when using the 'custom' metric.") + else: + metric_objects.append(metric_class()) + + return Metric(contacts, metric_objects, output_format_class, lipid_type) diff --git a/prolint2/metrics/registries.py b/prolint2/metrics/registries.py new file mode 100644 index 0000000..ee9043c --- /dev/null +++ b/prolint2/metrics/registries.py @@ -0,0 +1,35 @@ +from typing import List, Type +import importlib +import inspect + +import logging + +from prolint2.metrics.base import BaseMetric + +class MetricRegistry: + def __init__(self): + self._metrics = {} + self.module_name = 'prolint2.metrics.metrics' + + module = importlib.import_module(self.module_name) + for _, obj in inspect.getmembers(module): + if inspect.isclass(obj) and issubclass(obj, BaseMetric) and obj != BaseMetric: + metric_name = obj.name + self.register(metric_name, obj) + + # for metric_class in BaseMetric.__subclasses__(): + # metric_class._register(self) + + def register(self, name: str, metric_class: Type[BaseMetric]): + if name in self._metrics: + logging.warning(lambda: "Metric with name '%s' already exists in registry.", name) + self._metrics[name] = metric_class + + def get_metric(self, name: str) -> Type[BaseMetric]: + if name not in self._metrics: + raise ValueError(f"No metric found with the name '{name}'.") + return self._metrics[name] + + def get_registered_names(self) -> List[str]: + return list(self._metrics.keys()) + \ No newline at end of file diff --git a/prolint2/metrics/restime.py b/prolint2/metrics/restime.py new file mode 100644 index 0000000..618761b --- /dev/null +++ b/prolint2/metrics/restime.py @@ -0,0 +1,65 @@ +import numpy as np + +from prolint2.metrics.base import FittingFunction +from prolint2.metrics.fitters import FittingFunctionFactory + +class SurvivalFunction: + def __init__(self, durations, t_total, delta_t_list): + self.durations = durations + self.t_total = t_total + self.delta_t_list = delta_t_list + self.num_of_contacts = len(durations) + self.survival_func = self.calculate() + + def _calc_survival_value(self, delta_t): + filtered_durations = [res_time for res_time in self.durations if res_time >= delta_t] + sum_res_time = sum(filtered_durations) - delta_t * len(filtered_durations) + denominator = (self.t_total - delta_t) * self.num_of_contacts + + if delta_t != 0: + denominator *= self.survival_func0 + + return float(sum_res_time) / denominator if denominator != 0 else 0 + + def calculate(self): + survival_func = {} + for delta_t in self.delta_t_list: + survival_value = self._calc_survival_value(delta_t) + + if delta_t == 0: + survival_func[delta_t] = 1 + self.survival_func0 = survival_value + else: + survival_func[delta_t] = survival_value + + return survival_func + +class KoffCalculator: + def __init__(self, durations, t_total, timestep, fitting_func_name='bi_expo', **kwargs): + self.durations = durations + self.t_total = t_total + self.timestep = timestep + self.delta_t_list = np.arange(0, t_total, timestep) + self.kwargs = kwargs + + if self._is_empty_or_zeros(self.durations): + self.res_time, self.koff = 0, 0 + return + + if fitting_func_name not in FittingFunction.registry: + func_names = ', '.join(FittingFunction.registry.keys()) + raise ValueError(f"Invalid fitting_func_name: {fitting_func_name}. Valid names are: {func_names}") + + self.fitting_func = FittingFunctionFactory.get_fitting_function(fitting_func_name) + self.survival_func = SurvivalFunction(self.durations, np.max(self.t_total), self.delta_t_list).survival_func + self.res_time, self.koff = self.calculate_koff() + + def _is_empty_or_zeros(self, array): + return len(array) == 0 or np.all(array == 0) + + def calculate_koff(self): + survival_rates = np.nan_to_num([self.survival_func[delta_t] for delta_t in self.delta_t_list]) # TODO: check if nan_to_num is needed + popt = self.fitting_func.fit(np.array(self.delta_t_list), np.array(survival_rates), **self.kwargs) + koff = self.fitting_func.get_koff(popt) + res_time = 1 / koff + return res_time, koff \ No newline at end of file diff --git a/prolint2/metrics/utils.py b/prolint2/metrics/utils.py new file mode 100644 index 0000000..6ac62ce --- /dev/null +++ b/prolint2/metrics/utils.py @@ -0,0 +1,174 @@ +from typing import Iterable, List, Dict +from itertools import chain + +import numpy as np + +def fast_filter_resids_by_resname(resids: np.ndarray, resnames: np.ndarray, resids_subset: np.ndarray, resname: str): + """Filter the residue IDs by residue name.""" + indices = np.searchsorted(resids, resids_subset) + result = resids_subset[np.where(resnames[indices] == resname)[0]] + return set(result) + + +def filter_lipid_ids_by_resname(database, lipid_ids: np.ndarray, lipid_resname: str) -> np.ndarray: + """Filter lipid IDs by residue name. + + Parameters + ---------- + lipid_ids : np.ndarray + An array of lipid IDs. + lipid_resname : str + The residue name to filter by. + + Returns + ------- + np.ndarray + An array of filtered lipid IDs. + """ + sorted_lipid_ids = np.sort(lipid_ids) + sorted_indices = np.searchsorted(database.residues.resids, sorted_lipid_ids) + mask = np.zeros(database.residues.resids.shape, dtype=bool) + mask[sorted_indices] = True + filtered_resnames = sorted_lipid_ids[database.residues.resnames[mask] == lipid_resname] + + return filtered_resnames + + +def create_lipid_resname_mask(database, lipid_resname): + """Create a mask for filtering lipid IDs by residue name. """ + + return database.residues.resnames == lipid_resname + +def filter_resnames_by_lipid_ids_optimized(lipid_resname_mask, lipid_ids, database): + """Filter lipid IDs by residue name. This is an optimized version of filter_lipid_ids_by_resname, which requires + the lipid_resname_mask to be precomputed.""" + sorted_lipid_ids = np.sort(lipid_ids) + sorted_indices = np.searchsorted(database.residues.resids, sorted_lipid_ids) + mask = np.zeros(database.residues.resids.shape, dtype=bool) + mask[sorted_indices] = True + combined_mask = lipid_resname_mask & mask + filtered_resnames = sorted_lipid_ids[combined_mask[sorted_indices]] + + return filtered_resnames + +def contact_frames_to_binary_array(contact_frames: Iterable[int], n_frames: int) -> np.ndarray: + """Convert a list of contact frames to a binary array. + + Parameters + ---------- + contact_frames : Iterable[int] + A list of contact frames. + n_frames : int + The number of frames in the trajectory. + + Returns + ------- + np.ndarray + A binary array with ones at the indices corresponding to the contact frames. + """ + binary_array = np.zeros(n_frames) + binary_array[contact_frames] = 1 + + return binary_array + +def count_contiguous_segments(arr: np.ndarray) -> np.ndarray: + """Count the number of contiguous segments of ones in a binary array. + + Parameters + ---------- + arr : array_like + A binary array. + + Returns + ------- + np.ndarray + An array of segment lengths. + """ + if np.all(arr == 0): + return np.array([]) + + padded_arr = np.concatenate(([0], arr, [0])) + + start_indices = np.where(np.diff(padded_arr) == 1)[0] + end_indices = np.where(np.diff(padded_arr) == -1)[0] + + segment_lengths = end_indices - start_indices + + return segment_lengths + +def fast_contiguous_segment_lengths(arr, multiplier: float=1.) -> np.ndarray: + """Compute the lengths of contiguous segments of indices in the input array. + + Parameters + ---------- + arr : Iterable[int] + A sorted list of indices. + + Returns + ------- + np.ndarray + An array of contiguous segment lengths. + """ + if len(arr) == 0: + return np.array([]) + + # Calculate the differences between consecutive elements + diffs = np.diff(arr) + + # Find the indices where the difference is greater than 1 + split_indices = np.where(diffs > 1)[0] + + # Calculate the segment lengths directly from the split_indices array using slicing + segment_lengths = np.empty(split_indices.size + 1, dtype=int) + if split_indices.size == 0: + segment_lengths[0] = len(arr) + return segment_lengths * multiplier + segment_lengths[0] = split_indices[0] + 1 + segment_lengths[-1] = len(arr) - split_indices[-1] - 1 + segment_lengths[1:-1] = np.diff(split_indices) #- 1 + + return segment_lengths * multiplier + + +def index_of_ones(arr: np.ndarray) -> np.ndarray: + """Return the indices of ones in a binary array. + + Parameters + ---------- + arr : array_like + A binary array. + + Returns + ------- + np.ndarray + An array of indices. + """ + + return np.where(arr == 1)[0] + +def compute_lipid_durations(database, contact_frames: Dict[int, List[int]], lipid_resname: str, n_frames: int, multiplier: float = 1) -> np.ndarray: + """Compute the duration of lipid contacts. Slower implementation. + See ContactDurations for a faster implementation. + + Parameters + ---------- + contact_frames : Iterable[int] + A list of contact frames. + n_frames : int + The number of frames in the trajectory. + lipid_resname : str + The residue name of the lipid to compute durations for. + + Returns + ------- + np.ndarray + An array of lipid contact durations. + """ + + ids_to_filter = np.array(list(contact_frames.keys())) + lipid_ids = filter_lipid_ids_by_resname(database, ids_to_filter, lipid_resname) + + durations = [contact_frames_to_binary_array(v, n_frames) for k, v in contact_frames.items() if k in lipid_ids] + durations = [count_contiguous_segments(v) * multiplier for v in durations] + + return sorted(chain.from_iterable(durations)) diff --git a/prolint2/prolint2.py b/prolint2/prolint2.py deleted file mode 100644 index a69b69a..0000000 --- a/prolint2/prolint2.py +++ /dev/null @@ -1,340 +0,0 @@ -r"""PL2 base classes --- :mod:`prolint2.PL2` -====================================================== -:Authors: Daniel P. Ramirez & Besian I. Sejdiu -:Year: 2022 -:Copyright: MIT License -""" - -import os -import numpy as np -import MDAnalysis as mda -from MDAnalysis.core.topologyattrs import ResidueStringAttr -from .contacts import Contacts -import configparser - -# Getting the config file -config = configparser.ConfigParser(allow_no_value=True) -config.read(os.path.join(os.path.abspath(os.path.dirname(__file__)), "config.ini")) -parameters_config = config["Parameters"] - - -class MacrosClass(ResidueStringAttr): - """ - Class to add the *macros* metadata. - - The *macros* metadata is an additional label to each residue in the system, - that is going to be useful for the selection of the query and the database groups. - - If the residue is included in the :class:`MembraneDatabase` group, then the *macro* - metadata will be set as **membrane**; if the residue is included in the - :class:`QueryProteins` group then the *macro* metadata will be set as - **Protein#** depending on the number of segments (or chains) in the system; - otherwise the *macro* metadata will be set as **other**. - - .. warning:: - - The identification of the different proteins in the system will be done using one of two - approaches: - - i. If the format file used includes segment (or chain) information, then the *macro* metadata will - be set with the name specified in each segment (or chain). #TODO - - ii. If the format files used do not include this information (i.e. *gro* format file) then :class:`PL2` - will assume that proteins are ordered and the start residue of the next protein is always smaller than - the last residue of the previous protein. - - Example - ------- - All these assignation are done automatically by **prolint2**, so you do not need to use this - class for anything. But you can access the information of the *macros* metadata as follows:: - - from prolint2 import PL2 - target_system = PL2('coordinates.gro', 'trajectory.xtc') - - target_system.query.selected.residues.macros - - And you will get an uni-dimensional numpy array with same amount of values as residues selected in the **query** - group and the *macro* of each residue. You can do the same for your **database** group. - - """ - - attrname = "macros" - singular = "macro" - - @staticmethod - def _gen_initial_values(n_atoms, n_residues, n_segments): - return np.array(["other"] * n_residues, dtype=object) - - -class PL2(object): - """Base class for managing the distance-based contacts calculation routines done by the **PL2** - package. It reads a structure/topology file and a trajectory file in any of the MDAnalysis-supported - formats. - - Parameters - ---------- - structure: any MDAnalysis-supported structure/topology file. - - trajectory : any MDAnalysis-supported trajectory file. - - add_lipid_types : list - list of strings with the residue name of lipids not included in the **prolint2** list of supported residues. - - Attributes - ---------- - atoms : AtomGroup - MDAnalysis AtomGroup with all the atoms in the system. - residues : ResidueGroup - MDAnalysis ResidueGroup with all the residues in the system. - list_of_macros : list - All the availables macros to use during the selection of the query/database groups. - query : :class:`QueryProteins` - **Query** group to use during the calculation of the contacts. - database : :class:`MembraneDatabase` - **Database** group to use during the calculation of the contacts. - contacts : :class:`.contacts.Contacts` - Contacts object to run and analyze the distance-based contacts results. - """ - - def __init__(self, structure, trajectory, add_lipid_types=[]): - # TODO: - # @bis: use a variable for this query: self.atoms.select_atoms(protein_sel) - # We need to also store useful system information (see below) - # TODO: maybe keep a reference to the universe object (=> ufcc.u)? - - # wrapping some basic MDAnalysis groups - md = mda.Universe(structure, trajectory) - self.atoms = md.atoms - self.residues = self.atoms.residues - self.atoms.universe.add_TopologyAttr("macros") - - # adding the macros to the membrane residues - lipid_types = parameters_config["lipid_types"].split(", ") - lipid_types = lipid_types + add_lipid_types - not_protein_restypes = np.unique( - self.atoms.select_atoms("not protein").residues.resnames - ) - membrane_restypes = [] - for type in lipid_types: - if type in not_protein_restypes: - membrane_restypes.append("resname " + type) - if len(membrane_restypes) == 1: - membrane_sel = membrane_restypes[0] - elif len(membrane_restypes) > 1: - membrane_sel = membrane_restypes[0] - for type in membrane_restypes[1:]: - membrane_sel = membrane_sel + " or " + type - else: - print("There are not lipid residues in your system") - - # adding the macros to the protein residues - protein_sel = "protein" - # First possibility: we can access segment(chain) information from the Universe. - if ( - len(self.atoms.select_atoms(protein_sel).segments) > 1 - and self.atoms.select_atoms(protein_sel).segments.n_atoms - == self.atoms.select_atoms(protein_sel).n_atoms - ): - for segment_idx in range( - len(self.atoms.select_atoms(protein_sel).segments) - ): - self.atoms.select_atoms(protein_sel).segments[ - segment_idx - ].residues.macros = "Protein" + str(segment_idx) - # Second possibility: the assumption here is that proteins are ordered and the start residue of the next - # protein is always smaller than the last residue of the previous protein. - else: - # Get start and end indices of proteins in the system. - resseq = self.atoms.select_atoms(protein_sel).residues.resindices - p0 = resseq[0] - # first and last index - fi_li = [] - fi = 0 - for li, p in enumerate(resseq): - if p < p0: - fi_li.append((fi, li - 1)) - fi = li - p0 = p - fi_li.append((fi, li)) - - for idx, values in enumerate(fi_li): - fi = values[0] - li = values[1] - self.atoms.select_atoms(protein_sel).residues[ - list(range(fi, li + 1)) - ].residues.macros = "Protein" + str(idx) - - # TODO - # Add merge chains and options to change the name of the proteins. - - self.atoms.select_atoms(membrane_sel).residues.macros = "membrane" - self.list_of_macros = list(np.unique(self.atoms.residues.macros)) - self.query = QueryProteins(self.atoms.select_atoms(protein_sel)) - self.database = MembraneDatabase(self.atoms.select_atoms(membrane_sel)) - self.contacts = Contacts(self.query, self.database) - - # system information - self.query_unique = np.unique(self.query.selected.resnames) - self.query_unique_size = self.query_unique.size - self.database_unique = np.unique(self.database.selected.resnames) - self.database_unique_size = self.database_unique.size - self.n_frames = md.trajectory.n_frames - self.totaltime = md.trajectory.totaltime - self.time = md.trajectory.time - self.units = md.trajectory.units - self.dt = md.trajectory.dt - - def __str__(self): - return "Base class to handle the calculation of the contacts in prolint2." - - def __repr__(self): - return "Base class to handle the calculation of the contacts in prolint2." - - -class BasicGroup(object): - """ - Basic class to be heritaged for the :class:`MembraneDatabase` and :class:`QueryProteins` - classes in order to handle the **database** and **query** groups respectively. - - Attributes - ---------- - selected : AtomGroup - An MDAnalysis AtomGroup object that includes the atoms - that will be used as database/query for the calculation of the contacts. - whole : AtomGroup - An MDAnalysis AtomGroup object including all the atoms from where the selections - can be done to define the database/query atoms for the calculation of the contacts. - The *selected* attribute will be always a subset of the *whole*. - """ - - def __init__(self, whole): - self.selected = whole - self.whole = whole - - def select(self, selection="all"): - """ - Cast an MDAnalysis.Atom, MDAnalysis.Residue, MDAnalysis.ResidueGroup, or str syntax - from the **whole** AtomGroup to the **selected** AtomGroup. - - Parameters - ---------- - selection: MDAnalysis.Atom, MDAnalysis.Residue, MDAnalysis.ResidueGroup or str - atoms to cast - """ - assert isinstance( - selection, - ( - str, - np.ndarray, - mda.core.groups.Residue, - mda.core.groups.ResidueGroup, - mda.core.groups.Atom, - mda.core.groups.AtomGroup, - ), - ), "the selection must be one of the preceding types" - if isinstance( - selection, (mda.core.groups.Residue, mda.core.groups.ResidueGroup) - ): - selection = selection.atoms - elif isinstance(selection, mda.core.groups.Atom): - selection = self.whole.select_atoms(f"index {selection.index}") - elif isinstance(selection, np.ndarray): - selection = self.whole.atoms[selection] - elif isinstance(selection, str): - selection = self.whole.atoms.select_atoms(selection) - self.selected = selection - - -class MembraneDatabase(BasicGroup): - """ - Class to handle the membrane **database** group. - - It heritages all atributes and methods from the - :class:`BasicGroup` class, and includes some new ones that - are specific for the membrane **database** group. - """ - - def __init__(self, whole): - super().__init__(whole) - - def lipid_types(self): - """Get the names of all the lipids that will be analyzed. - - Returns - ------- - array of lipid names - """ - if not isinstance(self.selected, mda.core.groups.AtomGroup): - return np.array([]) - else: - return np.unique(self.selected.residues.resnames) - - def lipid_count(self): - """Get the name and count of each lipid that will be analyzed. - - Returns - ------- - dictionary - key:value corresponds to lipid_name:count. - """ - lc = {} - lipids = self.lipid_types() - for lipid in lipids: - lc[lipid] = len( - self.selected.residues[self.selected.residues.resnames == lipid] - ) - return lc - - def __str__(self): - if not isinstance(self.selected, mda.core.groups.AtomGroup): - return "" - else: - return "".format( - self.selected.atoms.n_atoms - ) - - def __repr__(self): - if not isinstance(self.selected, mda.core.groups.AtomGroup): - return "" - else: - return "".format( - self.selected.atoms.n_atoms - ) - - -class QueryProteins(BasicGroup): - """ - Class to handle the **query** proteins group. - - It heritages all atributes and methods from the - :class:`BasicGroup` class, and includes some new ones that - are specific for the **query** proteins group. - """ - - def __init__(self, whole): - super().__init__(whole) - - def list_proteins(self): - """Get the labels of all the proteins that will be analyzed. - - Returns - ------- - array of protein labels - """ - return np.unique(self.whole.residues.macros) - - def __str__(self): - if not isinstance(self.selected, mda.core.groups.AtomGroup): - return "" - else: - return "".format( - self.selected.atoms.n_atoms - ) - - def __repr__(self): - if not isinstance(self.selected, mda.core.groups.AtomGroup): - return "" - else: - return "".format( - self.selected.atoms.n_atoms - ) diff --git a/prolint2/server/__init__.py b/prolint2/server/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/prolint2/server/app.tpl b/prolint2/server/app.tpl deleted file mode 100644 index 3750462..0000000 --- a/prolint2/server/app.tpl +++ /dev/null @@ -1,12 +0,0 @@ -%#template for the form for a new task -

WebApps 1

- - - - - - - - -
- diff --git a/prolint2/server/chord_utils.py b/prolint2/server/chord_utils.py index 061e955..3d662cb 100644 --- a/prolint2/server/chord_utils.py +++ b/prolint2/server/chord_utils.py @@ -3,27 +3,40 @@ from inspect import ArgSpec import numpy as np from itertools import combinations -from .utils import calculate_contact_intervals +from prolint2.server.utils import calculate_contact_intervals + +# def per_lipid_contacts(ts, lipids, frame_cutoff=10): +# """ +# Given a list of lipid IDs, returns a dict with these lipid IDs +# as keys, and values set to a dict containing the residues these lipids +# interact with as keys and the corresponding number of contacts as values. +# These contacts can be filtered using the `frame_cutoff` option. + +# TODO: +# `frame_cutoff` should operate on a percentage of trajectory length. +# """ +# results = {k: {} for k in lipids} +# for k, v in ts.contacts.contact_frames.items(): +# if len(v) < frame_cutoff: +# continue +# # r, l = [int(x) for x in k.split(',')] # k used to be a string formatted as 'residue,lipid' +# # k now is a tuple of (residue, lipid) +# r, l = k +# if l in lipids: +# results[l][r] = len(v) +# return results def per_lipid_contacts(ts, lipids, frame_cutoff=10): - """ - Given a list of lipid IDs, returns a dict with these lipid IDs - as keys, and values set to a dict containing the residues these lipids - interact with as keys and the corresponding number of contacts as values. - These contacts can be filtered using the `frame_cutoff` option. - - TODO: - `frame_cutoff` should operate on a percentage of trajectory length. - """ results = {k: {} for k in lipids} - for k, v in ts.contacts.contact_frames.items(): - if len(v) < frame_cutoff: - continue - r, l = [int(x) for x in k.split(',')] - if l in lipids: - results[l][r] = len(v) + for residue_id, lipid_dict in ts.contacts.contact_frames.items(): + for lipid_id, frames in lipid_dict.items(): + if len(frames) < frame_cutoff: + continue + if lipid_id in lipids: + results[lipid_id][residue_id] = len(frames) return results + def sort_dict(d, cutoff=None): """ Takes a dictionary as input, and sorts it according to values. @@ -98,17 +111,17 @@ def get_ordered_combinations(lipid_contacts): return ordered_combinations -def shared_contacts(ts, top_lipids, lipid_contact_frames, *args, **kwargs): +def shared_contacts(contacts, top_lipids, lipid_contact_frames, *args, **kwargs): """ Aim: improve the shortcomings outlined in `get_ordered_combinations`. """ lipid_shared_contacts = {} for lipid in top_lipids: - contact_intervals = calculate_contact_intervals(ts, lipid_contact_frames, lipid, *args, **kwargs) + contact_intervals = calculate_contact_intervals(contacts, lipid_contact_frames, lipid, *args, **kwargs) residue_contacts = {} for res1, res2 in combinations(contact_intervals.keys(), 2): - contacts = residue_pair_matching_contacts(contact_intervals[res1], contact_intervals[res2]) - residue_contacts[f'{res1},{res2}'] = contacts + pair_contacts = residue_pair_matching_contacts(contact_intervals[res1], contact_intervals[res2]) + residue_contacts[f'{res1},{res2}'] = pair_contacts lipid_shared_contacts[lipid] = residue_contacts shared_contacts_all = {} @@ -167,8 +180,8 @@ def get_chord_elements(ts, nodes, ordered_combinations, cutoff=500): node_links = list(combinations(nodes, 2)) position_node_links = [x for x in node_links if x[0] == 0] - resnums = ts.query.selected.residues.resnums - resnames = ts.query.selected.residues.resnames + resnums = ts.query.residues.resnums + resnames = ts.query.residues.resnames node_names = {x[0]: f'{x[0]} {x[1]}' for x in list(zip(resnums, resnames))} chord_elements = [] @@ -196,7 +209,7 @@ def get_chord_elements(ts, nodes, ordered_combinations, cutoff=500): return chord_elements -def contact_chord(ts, top_lipid_ids, lipid_contact_frames, cutoff=100): +def contact_chord(ts, contacts, top_lipid_ids, lipid_contact_frames, cutoff=100): """ We call all functions here. We return the chord elements (these are the data amCharts needs to render nodes and links), we also return information on which @@ -207,7 +220,7 @@ def contact_chord(ts, top_lipid_ids, lipid_contact_frames, cutoff=100): """ # lipid_contacts = per_lipid_contacts(ts, top_lipid_ids) lipid_shared_contacts, ordered_combinations = shared_contacts( - ts, + contacts, top_lipid_ids, lipid_contact_frames, residues_to_show=30, @@ -215,7 +228,7 @@ def contact_chord(ts, top_lipid_ids, lipid_contact_frames, cutoff=100): ) # ordered_combinations = get_ordered_combinations(lipid_contacts) linked_nodes = get_linked_nodes(ordered_combinations, cutoff=cutoff) - nodes, hidden_node_indices = get_node_list(ts.query.selected.n_residues, linked_nodes) + nodes, hidden_node_indices = get_node_list(ts.query.n_residues, linked_nodes) chord_elements = get_chord_elements(ts, nodes, ordered_combinations, cutoff=cutoff) per_lipid_nodes = {} @@ -225,4 +238,4 @@ def contact_chord(ts, top_lipid_ids, lipid_contact_frames, cutoff=100): if all_lipid_nodes: per_lipid_nodes[lipid] = all_lipid_nodes - return chord_elements, hidden_node_indices, per_lipid_nodes \ No newline at end of file + return chord_elements, hidden_node_indices, per_lipid_nodes diff --git a/prolint2/server/get_json.py b/prolint2/server/get_json.py deleted file mode 100644 index 46c753f..0000000 --- a/prolint2/server/get_json.py +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env python - -import csv -import json - -# First data setup: -# For 1 protein systems -# protein_data = {Lipid: [ -# Residue: [FrameValues] -# ]} - -# TODO: -# For multiple protein systems: -# {Protein: protein_data} - -# TODO: -# Generalize to any system composition -# with system-agnostic terminology: -# {Reference: {InteractionObject: ReferenceUnit: [FrameValue]}} - -csv_in = open("out_girk.csv") - -js = {} -for row in csv.DictReader(csv_in): - lipid = row["Lipids"] - protein = row["Protein"] - # if lipid != "CHOL": continue - residue_id = row["ResName"] + " " + row["ResID"] - lipid_number_value = float(row["Lipid_Number"]) - value = [residue_id, float("{:.2f}".format(lipid_number_value))] - - if js.get(protein): - if js.get(protein).get(lipid): - js[protein][lipid].append(value) - else: - js[protein][lipid] = [value] - else: - js[protein] = {lipid: [value]} - -with open("girk.json", "w") as fp: - json.dump(js, fp) diff --git a/prolint2/server/girk.json b/prolint2/server/girk.json deleted file mode 100644 index 820a6ed..0000000 --- a/prolint2/server/girk.json +++ /dev/null @@ -1 +0,0 @@ -{"LocalGirk": {"CHOL": [["ARG 0", 0.0], ["GLN 1", 0.0], ["ARG 2", 0.0], ["TYR 3", 0.0], ["MET 4", 0.0], ["GLU 5", 0.0], ["LYS 6", 0.0], ["THR 7", 0.0], ["GLY 8", 0.0], ["LYS 9", 0.0], ["CYS 10", 0.0], ["ASN 11", 0.0], ["VAL 12", 0.0], ["HIS 13", 0.0], ["HIS 14", 0.0], ["GLY 15", 0.0], ["ASN 16", 0.09], ["VAL 17", 0.0], ["GLN 18", 0.01], ["GLU 19", 0.0], ["THR 20", 0.14], ["TYR 21", 1.75], ["ARG 22", 0.95], ["TYR 23", 1.48], ["LEU 24", 1.41], ["SER 25", 0.84], ["ASP 26", 0.2], ["LEU 27", 0.49], ["PHE 28", 0.06], ["THR 29", 0.0], ["THR 30", 0.01], ["LEU 31", 0.33], ["VAL 32", 0.0], ["ASP 33", 0.0], ["LEU 34", 0.18], ["LYS 35", 0.65], ["TRP 36", 0.41], ["ARG 37", 2.35], ["PHE 38", 2.23], ["ASN 39", 0.13], ["LEU 40", 1.48], ["LEU 41", 3.33], ["VAL 42", 1.59], ["PHE 43", 0.91], ["THR 44", 2.45], ["MET 45", 3.84], ["VAL 46", 1.92], ["TYR 47", 0.6], ["THR 48", 3.45], ["ILE 49", 2.1], ["THR 50", 1.15], ["TRP 51", 0.01], ["LEU 52", 1.02], ["PHE 53", 4.37], ["PHE 54", 1.09], ["GLY 55", 0.0], ["PHE 56", 2.25], ["ILE 57", 3.26], ["TRP 58", 0.03], ["TRP 59", 0.17], ["LEU 60", 1.8], ["ILE 61", 2.13], ["ALA 62", 0.38], ["TYR 63", 0.71], ["VAL 64", 2.92], ["ARG 65", 2.87], ["GLY 66", 0.43], ["ASP 67", 0.06], ["LEU 68", 0.0], ["ASP 69", 0.1], ["HIS 70", 0.0], ["VAL 71", 0.0], ["GLY 72", 0.0], ["ASP 73", 0.0], ["GLN 74", 0.0], ["GLU 75", 0.0], ["TRP 76", 0.0], ["ILE 77", 0.0], ["PRO 78", 0.0], ["CYS 79", 1.42], ["VAL 80", 0.0], ["GLU 81", 0.0], ["ASN 82", 0.0], ["LEU 83", 0.0], ["SER 84", 0.0], ["GLY 85", 0.01], ["PHE 86", 0.38], ["VAL 87", 2.2], ["SER 88", 0.44], ["ALA 89", 0.0], ["PHE 90", 2.29], ["LEU 91", 1.92], ["PHE 92", 0.0], ["SER 93", 0.0], ["ILE 94", 0.03], ["GLU 95", 0.0], ["THR 96", 0.0], ["GLU 97", 0.0], ["THR 98", 0.0], ["THR 99", 0.0], ["ILE 100", 0.0], ["GLY 101", 0.0], ["TYR 102", 0.0], ["GLY 103", 0.0], ["PHE 104", 0.0], ["ARG 105", 0.21], ["VAL 106", 0.0], ["ILE 107", 0.0], ["THR 108", 0.0], ["GLU 109", 0.03], ["LYS 110", 0.23], ["CYS 111", 0.33], ["PRO 112", 1.67], ["GLU 113", 3.01], ["GLY 114", 1.29], ["ILE 115", 0.37], ["ILE 116", 3.38], ["LEU 117", 3.91], ["LEU 118", 0.03], ["LEU 119", 1.14], ["VAL 120", 4.01], ["GLN 121", 1.71], ["ALA 122", 0.02], ["ILE 123", 1.76], ["LEU 124", 2.89], ["GLY 125", 0.3], ["SER 126", 0.19], ["ILE 127", 2.44], ["VAL 128", 1.0], ["ASN 129", 0.17], ["ALA 130", 0.02], ["PHE 131", 0.86], ["MET 132", 0.55], ["VAL 133", 0.0], ["GLY 134", 0.0], ["CYS 135", 0.06], ["MET 136", 0.11], ["PHE 137", 0.0], ["VAL 138", 0.0], ["LYS 139", 0.03], ["ILE 140", 0.01], ["SER 141", 0.0], ["GLN 142", 0.0], ["PRO 143", 0.0], ["LYS 144", 0.13], ["LYS 145", 0.12], ["ARG 146", 0.0], ["ALA 147", 0.0], ["GLU 148", 0.0], ["THR 149", 0.0], ["LEU 150", 0.0], ["MET 151", 0.0], ["PHE 152", 0.0], ["SER 153", 0.0], ["ASN 154", 0.0], ["ASN 155", 0.0], ["ALA 156", 0.0], ["VAL 157", 0.0], ["ILE 158", 0.0], ["SER 159", 0.0], ["MET 160", 0.0], ["ARG 161", 0.0], ["ASP 162", 0.0], ["GLU 163", 0.0], ["LYS 164", 0.0], ["LEU 165", 0.0], ["CYS 166", 0.0], ["LEU 167", 0.0], ["MET 168", 0.0], ["PHE 169", 0.0], ["ARG 170", 0.0], ["VAL 171", 0.0], ["GLY 172", 0.0], ["ASP 173", 0.0], ["LEU 174", 0.0], ["ARG 175", 0.06], ["ASN 176", 0.0], ["SER 177", 0.0], ["HIS 178", 0.0], ["ILE 179", 0.0], ["VAL 180", 0.0], ["GLU 181", 0.0], ["ALA 182", 0.0], ["SER 183", 0.0], ["ILE 184", 0.0], ["ARG 185", 0.0], ["ALA 186", 0.0], ["LYS 187", 0.0], ["LEU 188", 0.0], ["ILE 189", 0.0], ["LYS 190", 0.0], ["SER 191", 0.0], ["ARG 192", 0.0], ["GLN 193", 0.0], ["THR 194", 0.0], ["LYS 195", 0.0], ["GLU 196", 0.0], ["GLY 197", 0.0], ["GLU 198", 0.0], ["PHE 199", 0.0], ["ILE 200", 0.0], ["PRO 201", 0.0], ["LEU 202", 0.0], ["ASN 203", 0.0], ["GLN 204", 0.0], ["THR 205", 0.0], ["ASP 206", 0.0], ["ILE 207", 0.0], ["ASN 208", 0.0], ["VAL 209", 0.0], ["GLY 210", 0.0], ["PHE 211", 0.0], ["ASP 212", 0.0], ["THR 213", 0.0], ["GLY 214", 0.0], ["ASP 215", 0.0], ["ASP 216", 0.0], ["ARG 217", 0.0], ["LEU 218", 0.0], ["PHE 219", 0.0], ["LEU 220", 0.0], ["VAL 221", 0.0], ["SER 222", 0.0], ["PRO 223", 0.0], ["LEU 224", 0.0], ["ILE 225", 0.0], ["ILE 226", 0.0], ["SER 227", 0.0], ["HIS 228", 0.0], ["GLU 229", 0.0], ["ILE 230", 0.0], ["ASN 231", 0.0], ["GLU 232", 0.0], ["LYS 233", 0.0], ["SER 234", 0.0], ["PRO 235", 0.0], ["PHE 236", 0.0], ["TRP 237", 0.0], ["GLU 238", 0.0], ["MET 239", 0.0], ["SER 240", 0.0], ["ARG 241", 0.0], ["ALA 242", 0.0], ["GLN 243", 0.0], ["LEU 244", 0.0], ["GLU 245", 0.0], ["GLN 246", 0.0], ["GLU 247", 0.0], ["GLU 248", 0.0], ["PHE 249", 0.0], ["GLU 250", 0.0], ["VAL 251", 0.0], ["VAL 252", 0.0], ["VAL 253", 0.0], ["ILE 254", 0.0], ["LEU 255", 0.0], ["GLU 256", 0.0], ["GLY 257", 0.0], ["MET 258", 0.0], ["VAL 259", 0.0], ["GLU 260", 0.0], ["ALA 261", 0.0], ["THR 262", 0.0], ["GLY 263", 0.0], ["MET 264", 0.0], ["THR 265", 0.0], ["CYS 266", 0.0], ["GLN 267", 0.0], ["ALA 268", 0.0], ["ARG 269", 0.0], ["SER 270", 0.0], ["SER 271", 0.0], ["TYR 272", 0.0], ["MET 273", 0.0], ["ASP 274", 0.0], ["THR 275", 0.0], ["GLU 276", 0.0], ["VAL 277", 0.0], ["LEU 278", 0.0], ["TRP 279", 0.0], ["GLY 280", 0.0], ["HIS 281", 0.0], ["ARG 282", 0.0], ["PHE 283", 0.0], ["THR 284", 0.0], ["PRO 285", 0.0], ["VAL 286", 0.0], ["LEU 287", 0.0], ["THR 288", 0.0], ["LEU 289", 0.0], ["GLU 290", 0.0], ["LYS 291", 0.0], ["GLY 292", 0.0], ["PHE 293", 0.0], ["TYR 294", 0.0], ["GLU 295", 0.0], ["VAL 296", 0.0], ["ASP 297", 0.0], ["TYR 298", 0.0], ["ASN 299", 0.0], ["THR 300", 0.0], ["PHE 301", 0.0], ["HIS 302", 0.0], ["ASP 303", 0.0], ["THR 304", 0.0], ["TYR 305", 0.0], ["GLU 306", 0.0], ["THR 307", 0.0], ["ASN 308", 0.0], ["THR 309", 0.0], ["PRO 310", 0.0], ["SER 311", 0.0], ["CYS 312", 0.0], ["CYS 313", 0.0], ["ALA 314", 0.0], ["LYS 315", 0.0], ["GLU 316", 0.0], ["LEU 317", 0.0], ["ALA 318", 0.0], ["GLU 319", 0.0], ["MET 320", 0.0], ["ARG 321", 0.0], ["GLN 322", 0.0], ["ARG 323", 0.0], ["TYR 324", 0.0], ["MET 325", 0.0], ["GLU 326", 0.0], ["LYS 327", 0.0], ["THR 328", 0.0], ["GLY 329", 0.0], ["LYS 330", 0.0], ["CYS 331", 0.0], ["ASN 332", 0.0], ["VAL 333", 0.0], ["HIS 334", 0.0], ["HIS 335", 0.0], ["GLY 336", 0.01], ["ASN 337", 0.09], ["VAL 338", 0.63], ["GLN 339", 0.66], ["GLU 340", 0.36], ["THR 341", 1.43], ["TYR 342", 1.59], ["ARG 343", 0.1], ["TYR 344", 0.59], ["LEU 345", 1.55], ["SER 346", 0.79], ["ASP 347", 0.0], ["LEU 348", 0.93], ["PHE 349", 1.87], ["THR 350", 0.68], ["THR 351", 0.02], ["LEU 352", 0.96], ["VAL 353", 0.0], ["ASP 354", 0.01], ["LEU 355", 0.72], ["LYS 356", 0.43], ["TRP 357", 0.48], ["ARG 358", 1.69], ["PHE 359", 2.23], ["ASN 360", 0.19], ["LEU 361", 1.83], ["LEU 362", 4.23], ["VAL 363", 2.0], ["PHE 364", 0.06], ["THR 365", 2.51], ["MET 366", 4.1], ["VAL 367", 1.5], ["TYR 368", 0.21], ["THR 369", 1.65], ["ILE 370", 2.4], ["THR 371", 0.8], ["TRP 372", 0.1], ["LEU 373", 0.97], ["PHE 374", 2.6], ["PHE 375", 1.24], ["GLY 376", 0.0], ["PHE 377", 1.14], ["ILE 378", 2.31], ["TRP 379", 0.0], ["TRP 380", 0.17], ["LEU 381", 2.06], ["ILE 382", 2.07], ["ALA 383", 0.28], ["TYR 384", 0.93], ["VAL 385", 2.54], ["ARG 386", 2.36], ["GLY 387", 0.4], ["ASP 388", 0.04], ["LEU 389", 0.0], ["ASP 390", 0.02], ["HIS 391", 0.01], ["VAL 392", 0.0], ["GLY 393", 0.0], ["ASP 394", 0.0], ["GLN 395", 0.0], ["GLU 396", 0.0], ["TRP 397", 0.36], ["ILE 398", 0.0], ["PRO 399", 0.0], ["CYS 400", 0.93], ["VAL 401", 0.0], ["GLU 402", 0.0], ["ASN 403", 0.0], ["LEU 404", 0.0], ["SER 405", 0.0], ["GLY 406", 0.03], ["PHE 407", 0.56], ["VAL 408", 0.41], ["SER 409", 0.0], ["ALA 410", 0.0], ["PHE 411", 0.53], ["LEU 412", 0.03], ["PHE 413", 0.0], ["SER 414", 0.0], ["ILE 415", 0.02], ["GLU 416", 0.0], ["THR 417", 0.0], ["GLU 418", 0.0], ["THR 419", 0.0], ["THR 420", 0.0], ["ILE 421", 0.0], ["GLY 422", 0.0], ["TYR 423", 0.0], ["GLY 424", 0.0], ["PHE 425", 0.0], ["ARG 426", 0.0], ["VAL 427", 0.0], ["ILE 428", 0.0], ["THR 429", 0.0], ["GLU 430", 0.53], ["LYS 431", 2.08], ["CYS 432", 0.31], ["PRO 433", 2.96], ["GLU 434", 2.57], ["GLY 435", 0.24], ["ILE 436", 1.92], ["ILE 437", 4.59], ["LEU 438", 2.28], ["LEU 439", 0.24], ["LEU 440", 3.18], ["VAL 441", 3.63], ["GLN 442", 0.01], ["ALA 443", 0.02], ["ILE 444", 1.99], ["LEU 445", 2.03], ["GLY 446", 0.0], ["SER 447", 0.0], ["ILE 448", 0.44], ["VAL 449", 0.56], ["ASN 450", 0.0], ["ALA 451", 0.01], ["PHE 452", 0.66], ["MET 453", 0.15], ["VAL 454", 0.0], ["GLY 455", 0.0], ["CYS 456", 0.01], ["MET 457", 0.02], ["PHE 458", 0.0], ["VAL 459", 0.0], ["LYS 460", 0.0], ["ILE 461", 0.01], ["SER 462", 0.0], ["GLN 463", 0.0], ["PRO 464", 0.0], ["LYS 465", 0.0], ["LYS 466", 0.13], ["ARG 467", 0.0], ["ALA 468", 0.0], ["GLU 469", 0.0], ["THR 470", 0.0], ["LEU 471", 0.0], ["MET 472", 0.0], ["PHE 473", 0.0], ["SER 474", 0.0], ["ASN 475", 0.0], ["ASN 476", 0.0], ["ALA 477", 0.0], ["VAL 478", 0.0], ["ILE 479", 0.0], ["SER 480", 0.0], ["MET 481", 0.0], ["ARG 482", 0.0], ["ASP 483", 0.0], ["GLU 484", 0.0], ["LYS 485", 0.0], ["LEU 486", 0.0], ["CYS 487", 0.0], ["LEU 488", 0.0], ["MET 489", 0.0], ["PHE 490", 0.0], ["ARG 491", 0.0], ["VAL 492", 0.0], ["GLY 493", 0.0], ["ASP 494", 0.0], ["LEU 495", 0.0], ["ARG 496", 0.28], ["ASN 497", 0.0], ["SER 498", 0.0], ["HIS 499", 0.0], ["ILE 500", 0.0], ["VAL 501", 0.0], ["GLU 502", 0.0], ["ALA 503", 0.0], ["SER 504", 0.0], ["ILE 505", 0.0], ["ARG 506", 0.0], ["ALA 507", 0.0], ["LYS 508", 0.0], ["LEU 509", 0.0], ["ILE 510", 0.0], ["LYS 511", 0.0], ["SER 512", 0.0], ["ARG 513", 0.0], ["GLN 514", 0.0], ["THR 515", 0.0], ["LYS 516", 0.0], ["GLU 517", 0.0], ["GLY 518", 0.0], ["GLU 519", 0.0], ["PHE 520", 0.0], ["ILE 521", 0.0], ["PRO 522", 0.0], ["LEU 523", 0.0], ["ASN 524", 0.0], ["GLN 525", 0.0], ["THR 526", 0.0], ["ASP 527", 0.0], ["ILE 528", 0.0], ["ASN 529", 0.0], ["VAL 530", 0.0], ["GLY 531", 0.0], ["PHE 532", 0.0], ["ASP 533", 0.0], ["THR 534", 0.0], ["GLY 535", 0.0], ["ASP 536", 0.0], ["ASP 537", 0.0], ["ARG 538", 0.0], ["LEU 539", 0.0], ["PHE 540", 0.0], ["LEU 541", 0.0], ["VAL 542", 0.0], ["SER 543", 0.0], ["PRO 544", 0.0], ["LEU 545", 0.0], ["ILE 546", 0.0], ["ILE 547", 0.0], ["SER 548", 0.0], ["HIS 549", 0.0], ["GLU 550", 0.0], ["ILE 551", 0.0], ["ASN 552", 0.0], ["GLU 553", 0.0], ["LYS 554", 0.0], ["SER 555", 0.0], ["PRO 556", 0.0], ["PHE 557", 0.0], ["TRP 558", 0.0], ["GLU 559", 0.0], ["MET 560", 0.0], ["SER 561", 0.0], ["ARG 562", 0.0], ["ALA 563", 0.0], ["GLN 564", 0.0], ["LEU 565", 0.0], ["GLU 566", 0.0], ["GLN 567", 0.0], ["GLU 568", 0.0], ["GLU 569", 0.0], ["PHE 570", 0.0], ["GLU 571", 0.0], ["VAL 572", 0.0], ["VAL 573", 0.0], ["VAL 574", 0.0], ["ILE 575", 0.0], ["LEU 576", 0.0], ["GLU 577", 0.0], ["GLY 578", 0.0], ["MET 579", 0.0], ["VAL 580", 0.0], ["GLU 581", 0.0], ["ALA 582", 0.0], ["THR 583", 0.0], ["GLY 584", 0.0], ["MET 585", 0.0], ["THR 586", 0.0], ["CYS 587", 0.0], ["GLN 588", 0.0], ["ALA 589", 0.0], ["ARG 590", 0.0], ["SER 591", 0.0], ["SER 592", 0.0], ["TYR 593", 0.0], ["MET 594", 0.0], ["ASP 595", 0.0], ["THR 596", 0.0], ["GLU 597", 0.0], ["VAL 598", 0.0], ["LEU 599", 0.0], ["TRP 600", 0.0], ["GLY 601", 0.0], ["HIS 602", 0.0], ["ARG 603", 0.0], ["PHE 604", 0.0], ["THR 605", 0.0], ["PRO 606", 0.0], ["VAL 607", 0.0], ["LEU 608", 0.0], ["THR 609", 0.0], ["LEU 610", 0.0], ["GLU 611", 0.0], ["LYS 612", 0.1], ["GLY 613", 0.0], ["PHE 614", 0.01], ["TYR 615", 0.0], ["GLU 616", 0.0], ["VAL 617", 0.0], ["ASP 618", 0.01], ["TYR 619", 0.02], ["ASN 620", 0.0], ["THR 621", 0.0], ["PHE 622", 0.0], ["HIS 623", 0.0], ["ASP 624", 0.0], ["THR 625", 0.0], ["TYR 626", 0.0], ["GLU 627", 0.0], ["THR 628", 0.0], ["ASN 629", 0.0], ["THR 630", 0.0], ["PRO 631", 0.0], ["SER 632", 0.0], ["CYS 633", 0.0], ["CYS 634", 0.0], ["ALA 635", 0.0], ["LYS 636", 0.0], ["GLU 637", 0.0], ["LEU 638", 0.0], ["ALA 639", 0.0], ["GLU 640", 0.0], ["MET 641", 0.0], ["ARG 642", 0.0], ["GLN 643", 0.0], ["ARG 644", 0.0], ["TYR 645", 0.0], ["MET 646", 0.0], ["GLU 647", 0.0], ["LYS 648", 0.0], ["THR 649", 0.0], ["GLY 650", 0.0], ["LYS 651", 0.0], ["CYS 652", 0.0], ["ASN 653", 0.0], ["VAL 654", 0.0], ["HIS 655", 0.05], ["HIS 656", 0.0], ["GLY 657", 0.03], ["ASN 658", 0.78], ["VAL 659", 0.91], ["GLN 660", 0.38], ["GLU 661", 0.78], ["THR 662", 2.13], ["TYR 663", 1.65], ["ARG 664", 0.06], ["TYR 665", 1.2], ["LEU 666", 2.14], ["SER 667", 0.28], ["ASP 668", 0.04], ["LEU 669", 1.67], ["PHE 670", 0.02], ["THR 671", 0.0], ["THR 672", 1.11], ["LEU 673", 1.27], ["VAL 674", 0.01], ["ASP 675", 0.12], ["LEU 676", 1.42], ["LYS 677", 0.74], ["TRP 678", 1.19], ["ARG 679", 1.6], ["PHE 680", 1.83], ["ASN 681", 0.56], ["LEU 682", 1.24], ["LEU 683", 3.18], ["VAL 684", 2.24], ["PHE 685", 0.56], ["THR 686", 1.85], ["MET 687", 3.23], ["VAL 688", 2.14], ["TYR 689", 0.05], ["THR 690", 1.87], ["ILE 691", 2.11], ["THR 692", 1.29], ["TRP 693", 0.02], ["LEU 694", 0.8], ["PHE 695", 2.93], ["PHE 696", 1.18], ["GLY 697", 0.0], ["PHE 698", 1.49], ["ILE 699", 2.36], ["TRP 700", 0.0], ["TRP 701", 0.17], ["LEU 702", 1.57], ["ILE 703", 2.29], ["ALA 704", 0.02], ["TYR 705", 0.85], ["VAL 706", 3.12], ["ARG 707", 2.28], ["GLY 708", 0.55], ["ASP 709", 0.0], ["LEU 710", 0.0], ["ASP 711", 0.01], ["HIS 712", 0.0], ["VAL 713", 0.01], ["GLY 714", 0.01], ["ASP 715", 0.01], ["GLN 716", 0.02], ["GLU 717", 0.03], ["TRP 718", 0.15], ["ILE 719", 0.0], ["PRO 720", 0.0], ["CYS 721", 0.58], ["VAL 722", 0.0], ["GLU 723", 0.0], ["ASN 724", 0.0], ["LEU 725", 0.0], ["SER 726", 0.0], ["GLY 727", 0.05], ["PHE 728", 0.26], ["VAL 729", 1.21], ["SER 730", 0.02], ["ALA 731", 0.0], ["PHE 732", 1.77], ["LEU 733", 1.46], ["PHE 734", 0.0], ["SER 735", 0.0], ["ILE 736", 0.43], ["GLU 737", 0.0], ["THR 738", 0.0], ["GLU 739", 0.0], ["THR 740", 0.0], ["THR 741", 0.0], ["ILE 742", 0.0], ["GLY 743", 0.0], ["TYR 744", 0.0], ["GLY 745", 0.0], ["PHE 746", 0.0], ["ARG 747", 0.0], ["VAL 748", 0.0], ["ILE 749", 0.0], ["THR 750", 0.0], ["GLU 751", 0.0], ["LYS 752", 0.08], ["CYS 753", 0.27], ["PRO 754", 1.23], ["GLU 755", 2.84], ["GLY 756", 1.51], ["ILE 757", 0.04], ["ILE 758", 3.19], ["LEU 759", 2.48], ["LEU 760", 0.0], ["LEU 761", 1.43], ["VAL 762", 2.78], ["GLN 763", 0.01], ["ALA 764", 0.0], ["ILE 765", 0.5], ["LEU 766", 2.13], ["GLY 767", 0.46], ["SER 768", 0.0], ["ILE 769", 0.82], ["VAL 770", 1.69], ["ASN 771", 0.01], ["ALA 772", 0.0], ["PHE 773", 1.5], ["MET 774", 0.97], ["VAL 775", 0.0], ["GLY 776", 0.0], ["CYS 777", 0.01], ["MET 778", 0.05], ["PHE 779", 0.0], ["VAL 780", 0.0], ["LYS 781", 0.12], ["ILE 782", 0.79], ["SER 783", 0.05], ["GLN 784", 0.07], ["PRO 785", 0.03], ["LYS 786", 0.07], ["LYS 787", 0.39], ["ARG 788", 0.0], ["ALA 789", 0.0], ["GLU 790", 0.0], ["THR 791", 0.0], ["LEU 792", 0.0], ["MET 793", 0.0], ["PHE 794", 0.0], ["SER 795", 0.0], ["ASN 796", 0.0], ["ASN 797", 0.0], ["ALA 798", 0.0], ["VAL 799", 0.0], ["ILE 800", 0.0], ["SER 801", 0.0], ["MET 802", 0.0], ["ARG 803", 0.0], ["ASP 804", 0.0], ["GLU 805", 0.0], ["LYS 806", 0.0], ["LEU 807", 0.0], ["CYS 808", 0.0], ["LEU 809", 0.0], ["MET 810", 0.0], ["PHE 811", 0.0], ["ARG 812", 0.0], ["VAL 813", 0.0], ["GLY 814", 0.0], ["ASP 815", 0.0], ["LEU 816", 0.0], ["ARG 817", 0.0], ["ASN 818", 0.0], ["SER 819", 0.0], ["HIS 820", 0.0], ["ILE 821", 0.0], ["VAL 822", 0.0], ["GLU 823", 0.0], ["ALA 824", 0.0], ["SER 825", 0.0], ["ILE 826", 0.0], ["ARG 827", 0.0], ["ALA 828", 0.0], ["LYS 829", 0.0], ["LEU 830", 0.0], ["ILE 831", 0.0], ["LYS 832", 0.0], ["SER 833", 0.0], ["ARG 834", 0.0], ["GLN 835", 0.0], ["THR 836", 0.0], ["LYS 837", 0.0], ["GLU 838", 0.0], ["GLY 839", 0.0], ["GLU 840", 0.0], ["PHE 841", 0.0], ["ILE 842", 0.0], ["PRO 843", 0.0], ["LEU 844", 0.0], ["ASN 845", 0.0], ["GLN 846", 0.0], ["THR 847", 0.0], ["ASP 848", 0.0], ["ILE 849", 0.0], ["ASN 850", 0.0], ["VAL 851", 0.0], ["GLY 852", 0.0], ["PHE 853", 0.0], ["ASP 854", 0.0], ["THR 855", 0.0], ["GLY 856", 0.0], ["ASP 857", 0.0], ["ASP 858", 0.0], ["ARG 859", 0.0], ["LEU 860", 0.0], ["PHE 861", 0.0], ["LEU 862", 0.0], ["VAL 863", 0.0], ["SER 864", 0.0], ["PRO 865", 0.0], ["LEU 866", 0.0], ["ILE 867", 0.0], ["ILE 868", 0.0], ["SER 869", 0.0], ["HIS 870", 0.0], ["GLU 871", 0.0], ["ILE 872", 0.0], ["ASN 873", 0.0], ["GLU 874", 0.0], ["LYS 875", 0.0], ["SER 876", 0.0], ["PRO 877", 0.0], ["PHE 878", 0.0], ["TRP 879", 0.0], ["GLU 880", 0.0], ["MET 881", 0.0], ["SER 882", 0.0], ["ARG 883", 0.0], ["ALA 884", 0.0], ["GLN 885", 0.0], ["LEU 886", 0.0], ["GLU 887", 0.0], ["GLN 888", 0.0], ["GLU 889", 0.0], ["GLU 890", 0.0], ["PHE 891", 0.0], ["GLU 892", 0.0], ["VAL 893", 0.0], ["VAL 894", 0.0], ["VAL 895", 0.0], ["ILE 896", 0.0], ["LEU 897", 0.0], ["GLU 898", 0.0], ["GLY 899", 0.0], ["MET 900", 0.0], ["VAL 901", 0.0], ["GLU 902", 0.0], ["ALA 903", 0.0], ["THR 904", 0.0], ["GLY 905", 0.0], ["MET 906", 0.0], ["THR 907", 0.0], ["CYS 908", 0.0], ["GLN 909", 0.0], ["ALA 910", 0.0], ["ARG 911", 0.0], ["SER 912", 0.0], ["SER 913", 0.0], ["TYR 914", 0.0], ["MET 915", 0.0], ["ASP 916", 0.0], ["THR 917", 0.0], ["GLU 918", 0.0], ["VAL 919", 0.0], ["LEU 920", 0.0], ["TRP 921", 0.0], ["GLY 922", 0.0], ["HIS 923", 0.0], ["ARG 924", 0.0], ["PHE 925", 0.0], ["THR 926", 0.0], ["PRO 927", 0.0], ["VAL 928", 0.0], ["LEU 929", 0.0], ["THR 930", 0.0], ["LEU 931", 0.0], ["GLU 932", 0.0], ["LYS 933", 0.0], ["GLY 934", 0.0], ["PHE 935", 0.0], ["TYR 936", 0.0], ["GLU 937", 0.0], ["VAL 938", 0.0], ["ASP 939", 0.0], ["TYR 940", 0.0], ["ASN 941", 0.0], ["THR 942", 0.0], ["PHE 943", 0.0], ["HIS 944", 0.0], ["ASP 945", 0.0], ["THR 946", 0.0], ["TYR 947", 0.0], ["GLU 948", 0.0], ["THR 949", 0.0], ["ASN 950", 0.0], ["THR 951", 0.0], ["PRO 952", 0.0], ["SER 953", 0.0], ["CYS 954", 0.0], ["CYS 955", 0.0], ["ALA 956", 0.0], ["LYS 957", 0.0], ["GLU 958", 0.0], ["LEU 959", 0.0], ["ALA 960", 0.0], ["GLU 961", 0.0], ["MET 962", 0.0], ["ARG 963", 0.0], ["GLN 964", 0.0], ["ARG 965", 0.0], ["TYR 966", 0.0], ["MET 967", 0.0], ["GLU 968", 0.0], ["LYS 969", 0.0], ["THR 970", 0.0], ["GLY 971", 0.0], ["LYS 972", 0.0], ["CYS 973", 0.0], ["ASN 974", 0.0], ["VAL 975", 0.0], ["HIS 976", 0.0], ["HIS 977", 0.0], ["GLY 978", 0.0], ["ASN 979", 0.0], ["VAL 980", 0.0], ["GLN 981", 0.0], ["GLU 982", 0.11], ["THR 983", 0.05], ["TYR 984", 1.2], ["ARG 985", 0.64], ["TYR 986", 1.6], ["LEU 987", 1.83], ["SER 988", 0.85], ["ASP 989", 0.15], ["LEU 990", 1.94], ["PHE 991", 1.13], ["THR 992", 0.01], ["THR 993", 0.06], ["LEU 994", 0.7], ["VAL 995", 0.27], ["ASP 996", 0.0], ["LEU 997", 0.19], ["LYS 998", 0.55], ["TRP 999", 1.92], ["ARG 1000", 1.86], ["PHE 1001", 1.78], ["ASN 1002", 1.5], ["LEU 1003", 3.43], ["LEU 1004", 3.85], ["VAL 1005", 1.21], ["PHE 1006", 2.01], ["THR 1007", 3.53], ["MET 1008", 3.33], ["VAL 1009", 1.82], ["TYR 1010", 0.31], ["THR 1011", 1.7], ["ILE 1012", 2.73], ["THR 1013", 1.32], ["TRP 1014", 0.05], ["LEU 1015", 1.25], ["PHE 1016", 2.53], ["PHE 1017", 0.96], ["GLY 1018", 0.01], ["PHE 1019", 0.96], ["ILE 1020", 1.57], ["TRP 1021", 0.01], ["TRP 1022", 0.2], ["LEU 1023", 1.37], ["ILE 1024", 1.82], ["ALA 1025", 0.16], ["TYR 1026", 0.9], ["VAL 1027", 2.78], ["ARG 1028", 2.34], ["GLY 1029", 0.71], ["ASP 1030", 0.0], ["LEU 1031", 0.0], ["ASP 1032", 0.0], ["HIS 1033", 0.0], ["VAL 1034", 0.0], ["GLY 1035", 0.0], ["ASP 1036", 0.0], ["GLN 1037", 0.0], ["GLU 1038", 0.0], ["TRP 1039", 0.01], ["ILE 1040", 0.02], ["PRO 1041", 0.0], ["CYS 1042", 0.51], ["VAL 1043", 0.0], ["GLU 1044", 0.0], ["ASN 1045", 0.0], ["LEU 1046", 0.0], ["SER 1047", 0.0], ["GLY 1048", 0.07], ["PHE 1049", 0.67], ["VAL 1050", 1.14], ["SER 1051", 0.01], ["ALA 1052", 0.0], ["PHE 1053", 1.21], ["LEU 1054", 0.44], ["PHE 1055", 0.0], ["SER 1056", 0.0], ["ILE 1057", 0.05], ["GLU 1058", 0.0], ["THR 1059", 0.0], ["GLU 1060", 0.0], ["THR 1061", 0.0], ["THR 1062", 0.0], ["ILE 1063", 0.0], ["GLY 1064", 0.0], ["TYR 1065", 0.0], ["GLY 1066", 0.0], ["PHE 1067", 0.0], ["ARG 1068", 0.0], ["VAL 1069", 0.0], ["ILE 1070", 0.0], ["THR 1071", 0.0], ["GLU 1072", 0.0], ["LYS 1073", 1.12], ["CYS 1074", 1.1], ["PRO 1075", 2.51], ["GLU 1076", 2.29], ["GLY 1077", 0.14], ["ILE 1078", 1.44], ["ILE 1079", 3.71], ["LEU 1080", 2.17], ["LEU 1081", 0.3], ["LEU 1082", 1.66], ["VAL 1083", 2.64], ["GLN 1084", 0.06], ["ALA 1085", 0.0], ["ILE 1086", 0.46], ["LEU 1087", 1.56], ["GLY 1088", 0.0], ["SER 1089", 0.0], ["ILE 1090", 0.84], ["VAL 1091", 1.2], ["ASN 1092", 0.0], ["ALA 1093", 0.01], ["PHE 1094", 1.65], ["MET 1095", 1.07], ["VAL 1096", 0.0], ["GLY 1097", 0.0], ["CYS 1098", 1.11], ["MET 1099", 1.45], ["PHE 1100", 0.0], ["VAL 1101", 0.0], ["LYS 1102", 0.85], ["ILE 1103", 0.24], ["SER 1104", 0.0], ["GLN 1105", 0.0], ["PRO 1106", 0.0], ["LYS 1107", 0.0], ["LYS 1108", 0.36], ["ARG 1109", 0.0], ["ALA 1110", 0.0], ["GLU 1111", 0.0], ["THR 1112", 0.0], ["LEU 1113", 0.0], ["MET 1114", 0.0], ["PHE 1115", 0.0], ["SER 1116", 0.0], ["ASN 1117", 0.0], ["ASN 1118", 0.0], ["ALA 1119", 0.0], ["VAL 1120", 0.0], ["ILE 1121", 0.0], ["SER 1122", 0.0], ["MET 1123", 0.0], ["ARG 1124", 0.0], ["ASP 1125", 0.0], ["GLU 1126", 0.0], ["LYS 1127", 0.0], ["LEU 1128", 0.0], ["CYS 1129", 0.0], ["LEU 1130", 0.0], ["MET 1131", 0.0], ["PHE 1132", 0.0], ["ARG 1133", 0.0], ["VAL 1134", 0.0], ["GLY 1135", 0.0], ["ASP 1136", 0.0], ["LEU 1137", 0.0], ["ARG 1138", 0.09], ["ASN 1139", 0.0], ["SER 1140", 0.0], ["HIS 1141", 0.0], ["ILE 1142", 0.0], ["VAL 1143", 0.0], ["GLU 1144", 0.0], ["ALA 1145", 0.0], ["SER 1146", 0.0], ["ILE 1147", 0.0], ["ARG 1148", 0.0], ["ALA 1149", 0.0], ["LYS 1150", 0.0], ["LEU 1151", 0.0], ["ILE 1152", 0.0], ["LYS 1153", 0.0], ["SER 1154", 0.0], ["ARG 1155", 0.0], ["GLN 1156", 0.0], ["THR 1157", 0.0], ["LYS 1158", 0.0], ["GLU 1159", 0.0], ["GLY 1160", 0.0], ["GLU 1161", 0.0], ["PHE 1162", 0.0], ["ILE 1163", 0.0], ["PRO 1164", 0.0], ["LEU 1165", 0.0], ["ASN 1166", 0.0], ["GLN 1167", 0.0], ["THR 1168", 0.0], ["ASP 1169", 0.0], ["ILE 1170", 0.0], ["ASN 1171", 0.0], ["VAL 1172", 0.0], ["GLY 1173", 0.0], ["PHE 1174", 0.0], ["ASP 1175", 0.0], ["THR 1176", 0.0], ["GLY 1177", 0.0], ["ASP 1178", 0.0], ["ASP 1179", 0.0], ["ARG 1180", 0.0], ["LEU 1181", 0.0], ["PHE 1182", 0.0], ["LEU 1183", 0.0], ["VAL 1184", 0.0], ["SER 1185", 0.0], ["PRO 1186", 0.0], ["LEU 1187", 0.0], ["ILE 1188", 0.0], ["ILE 1189", 0.0], ["SER 1190", 0.0], ["HIS 1191", 0.0], ["GLU 1192", 0.0], ["ILE 1193", 0.0], ["ASN 1194", 0.0], ["GLU 1195", 0.0], ["LYS 1196", 0.0], ["SER 1197", 0.0], ["PRO 1198", 0.0], ["PHE 1199", 0.0], ["TRP 1200", 0.0], ["GLU 1201", 0.0], ["MET 1202", 0.0], ["SER 1203", 0.0], ["ARG 1204", 0.0], ["ALA 1205", 0.0], ["GLN 1206", 0.0], ["LEU 1207", 0.0], ["GLU 1208", 0.0], ["GLN 1209", 0.0], ["GLU 1210", 0.0], ["GLU 1211", 0.0], ["PHE 1212", 0.0], ["GLU 1213", 0.0], ["VAL 1214", 0.0], ["VAL 1215", 0.0], ["VAL 1216", 0.0], ["ILE 1217", 0.0], ["LEU 1218", 0.0], ["GLU 1219", 0.0], ["GLY 1220", 0.0], ["MET 1221", 0.0], ["VAL 1222", 0.0], ["GLU 1223", 0.0], ["ALA 1224", 0.0], ["THR 1225", 0.0], ["GLY 1226", 0.0], ["MET 1227", 0.0], ["THR 1228", 0.0], ["CYS 1229", 0.0], ["GLN 1230", 0.0], ["ALA 1231", 0.0], ["ARG 1232", 0.0], ["SER 1233", 0.0], ["SER 1234", 0.0], ["TYR 1235", 0.0], ["MET 1236", 0.0], ["ASP 1237", 0.0], ["THR 1238", 0.0], ["GLU 1239", 0.0], ["VAL 1240", 0.0], ["LEU 1241", 0.0], ["TRP 1242", 0.0], ["GLY 1243", 0.0], ["HIS 1244", 0.0], ["ARG 1245", 0.0], ["PHE 1246", 0.0], ["THR 1247", 0.0], ["PRO 1248", 0.0], ["VAL 1249", 0.0], ["LEU 1250", 0.0], ["THR 1251", 0.0], ["LEU 1252", 0.0], ["GLU 1253", 0.0], ["LYS 1254", 0.0], ["GLY 1255", 0.0], ["PHE 1256", 0.0], ["TYR 1257", 0.0], ["GLU 1258", 0.0], ["VAL 1259", 0.0], ["ASP 1260", 0.0], ["TYR 1261", 0.0], ["ASN 1262", 0.0], ["THR 1263", 0.0], ["PHE 1264", 0.0], ["HIS 1265", 0.0], ["ASP 1266", 0.0], ["THR 1267", 0.0], ["TYR 1268", 0.0], ["GLU 1269", 0.0], ["THR 1270", 0.0], ["ASN 1271", 0.0], ["THR 1272", 0.0], ["PRO 1273", 0.0], ["SER 1274", 0.0], ["CYS 1275", 0.0], ["CYS 1276", 0.0], ["ALA 1277", 0.0], ["LYS 1278", 0.0], ["GLU 1279", 0.0], ["LEU 1280", 0.0], ["ALA 1281", 0.0], ["GLU 1282", 0.0], ["MET 1283", 0.0]], "POPE": [["ARG 0", 0.17], ["GLN 1", 0.0], ["ARG 2", 0.0], ["TYR 3", 0.0], ["MET 4", 0.0], ["GLU 5", 0.01], ["LYS 6", 0.0], ["THR 7", 0.01], ["GLY 8", 0.0], ["LYS 9", 0.12], ["CYS 10", 0.0], ["ASN 11", 0.01], ["VAL 12", 0.0], ["HIS 13", 0.34], ["HIS 14", 0.02], ["GLY 15", 0.5], ["ASN 16", 1.19], ["VAL 17", 0.24], ["GLN 18", 0.83], ["GLU 19", 0.18], ["THR 20", 1.36], ["TYR 21", 3.03], ["ARG 22", 2.23], ["TYR 23", 1.17], ["LEU 24", 1.66], ["SER 25", 0.92], ["ASP 26", 0.08], ["LEU 27", 0.71], ["PHE 28", 0.07], ["THR 29", 0.0], ["THR 30", 0.01], ["LEU 31", 0.59], ["VAL 32", 0.0], ["ASP 33", 0.01], ["LEU 34", 0.06], ["LYS 35", 1.33], ["TRP 36", 0.62], ["ARG 37", 1.69], ["PHE 38", 1.18], ["ASN 39", 0.01], ["LEU 40", 0.63], ["LEU 41", 0.98], ["VAL 42", 0.85], ["PHE 43", 0.64], ["THR 44", 0.36], ["MET 45", 1.47], ["VAL 46", 1.08], ["TYR 47", 0.03], ["THR 48", 0.54], ["ILE 49", 1.67], ["THR 50", 0.33], ["TRP 51", 0.0], ["LEU 52", 1.24], ["PHE 53", 1.82], ["PHE 54", 0.08], ["GLY 55", 0.0], ["PHE 56", 2.74], ["ILE 57", 0.49], ["TRP 58", 0.0], ["TRP 59", 1.82], ["LEU 60", 1.68], ["ILE 61", 0.19], ["ALA 62", 0.0], ["TYR 63", 2.41], ["VAL 64", 1.56], ["ARG 65", 2.1], ["GLY 66", 1.27], ["ASP 67", 0.46], ["LEU 68", 0.31], ["ASP 69", 1.51], ["HIS 70", 0.57], ["VAL 71", 0.03], ["GLY 72", 0.0], ["ASP 73", 0.02], ["GLN 74", 0.0], ["GLU 75", 0.07], ["TRP 76", 0.03], ["ILE 77", 0.11], ["PRO 78", 0.03], ["CYS 79", 0.03], ["VAL 80", 0.0], ["GLU 81", 0.0], ["ASN 82", 0.0], ["LEU 83", 0.0], ["SER 84", 0.5], ["GLY 85", 0.73], ["PHE 86", 1.36], ["VAL 87", 0.84], ["SER 88", 0.32], ["ALA 89", 0.0], ["PHE 90", 0.54], ["LEU 91", 0.0], ["PHE 92", 0.0], ["SER 93", 0.0], ["ILE 94", 0.0], ["GLU 95", 0.0], ["THR 96", 0.0], ["GLU 97", 0.0], ["THR 98", 0.0], ["THR 99", 0.0], ["ILE 100", 0.0], ["GLY 101", 0.0], ["TYR 102", 0.0], ["GLY 103", 0.0], ["PHE 104", 0.0], ["ARG 105", 0.01], ["VAL 106", 0.0], ["ILE 107", 0.0], ["THR 108", 0.0], ["GLU 109", 0.0], ["LYS 110", 0.54], ["CYS 111", 0.7], ["PRO 112", 1.01], ["GLU 113", 1.37], ["GLY 114", 0.01], ["ILE 115", 0.01], ["ILE 116", 1.11], ["LEU 117", 0.56], ["LEU 118", 0.0], ["LEU 119", 0.26], ["VAL 120", 0.86], ["GLN 121", 0.08], ["ALA 122", 0.0], ["ILE 123", 0.03], ["LEU 124", 0.34], ["GLY 125", 0.01], ["SER 126", 0.0], ["ILE 127", 0.18], ["VAL 128", 0.33], ["ASN 129", 0.0], ["ALA 130", 0.0], ["PHE 131", 0.64], ["MET 132", 0.01], ["VAL 133", 0.0], ["GLY 134", 0.0], ["CYS 135", 0.0], ["MET 136", 0.0], ["PHE 137", 0.0], ["VAL 138", 0.0], ["LYS 139", 0.51], ["ILE 140", 0.01], ["SER 141", 0.0], ["GLN 142", 0.01], ["PRO 143", 0.0], ["LYS 144", 0.75], ["LYS 145", 0.75], ["ARG 146", 0.0], ["ALA 147", 0.01], ["GLU 148", 0.64], ["THR 149", 0.01], ["LEU 150", 0.0], ["MET 151", 0.01], ["PHE 152", 0.0], ["SER 153", 0.0], ["ASN 154", 0.0], ["ASN 155", 0.0], ["ALA 156", 0.0], ["VAL 157", 0.0], ["ILE 158", 0.0], ["SER 159", 0.0], ["MET 160", 0.0], ["ARG 161", 0.0], ["ASP 162", 0.0], ["GLU 163", 0.0], ["LYS 164", 0.0], ["LEU 165", 0.0], ["CYS 166", 0.0], ["LEU 167", 0.0], ["MET 168", 0.0], ["PHE 169", 0.0], ["ARG 170", 0.0], ["VAL 171", 0.0], ["GLY 172", 0.0], ["ASP 173", 0.0], ["LEU 174", 0.0], ["ARG 175", 0.63], ["ASN 176", 0.08], ["SER 177", 0.01], ["HIS 178", 0.0], ["ILE 179", 0.0], ["VAL 180", 0.0], ["GLU 181", 0.0], ["ALA 182", 0.0], ["SER 183", 0.0], ["ILE 184", 0.0], ["ARG 185", 0.0], ["ALA 186", 0.0], ["LYS 187", 0.0], ["LEU 188", 0.0], ["ILE 189", 0.0], ["LYS 190", 0.0], ["SER 191", 0.0], ["ARG 192", 0.0], ["GLN 193", 0.0], ["THR 194", 0.0], ["LYS 195", 0.0], ["GLU 196", 0.0], ["GLY 197", 0.0], ["GLU 198", 0.0], ["PHE 199", 0.0], ["ILE 200", 0.0], ["PRO 201", 0.0], ["LEU 202", 0.0], ["ASN 203", 0.0], ["GLN 204", 0.0], ["THR 205", 0.0], ["ASP 206", 0.0], ["ILE 207", 0.0], ["ASN 208", 0.0], ["VAL 209", 0.0], ["GLY 210", 0.0], ["PHE 211", 0.0], ["ASP 212", 0.0], ["THR 213", 0.0], ["GLY 214", 0.0], ["ASP 215", 0.0], ["ASP 216", 0.0], ["ARG 217", 0.0], ["LEU 218", 0.0], ["PHE 219", 0.0], ["LEU 220", 0.0], ["VAL 221", 0.0], ["SER 222", 0.0], ["PRO 223", 0.0], ["LEU 224", 0.0], ["ILE 225", 0.0], ["ILE 226", 0.0], ["SER 227", 0.0], ["HIS 228", 0.0], ["GLU 229", 0.0], ["ILE 230", 0.0], ["ASN 231", 0.0], ["GLU 232", 0.0], ["LYS 233", 0.0], ["SER 234", 0.0], ["PRO 235", 0.0], ["PHE 236", 0.0], ["TRP 237", 0.0], ["GLU 238", 0.0], ["MET 239", 0.0], ["SER 240", 0.0], ["ARG 241", 0.0], ["ALA 242", 0.0], ["GLN 243", 0.0], ["LEU 244", 0.0], ["GLU 245", 0.0], ["GLN 246", 0.0], ["GLU 247", 0.0], ["GLU 248", 0.0], ["PHE 249", 0.0], ["GLU 250", 0.0], ["VAL 251", 0.0], ["VAL 252", 0.0], ["VAL 253", 0.0], ["ILE 254", 0.0], ["LEU 255", 0.0], ["GLU 256", 0.0], ["GLY 257", 0.0], ["MET 258", 0.0], ["VAL 259", 0.0], ["GLU 260", 0.0], ["ALA 261", 0.0], ["THR 262", 0.0], ["GLY 263", 0.0], ["MET 264", 0.0], ["THR 265", 0.0], ["CYS 266", 0.0], ["GLN 267", 0.0], ["ALA 268", 0.0], ["ARG 269", 0.01], ["SER 270", 0.0], ["SER 271", 0.0], ["TYR 272", 0.0], ["MET 273", 0.0], ["ASP 274", 0.0], ["THR 275", 0.0], ["GLU 276", 0.0], ["VAL 277", 0.0], ["LEU 278", 0.0], ["TRP 279", 0.0], ["GLY 280", 0.0], ["HIS 281", 0.0], ["ARG 282", 0.0], ["PHE 283", 0.0], ["THR 284", 0.0], ["PRO 285", 0.0], ["VAL 286", 0.0], ["LEU 287", 0.0], ["THR 288", 0.0], ["LEU 289", 0.0], ["GLU 290", 0.0], ["LYS 291", 0.0], ["GLY 292", 0.0], ["PHE 293", 0.51], ["TYR 294", 0.01], ["GLU 295", 0.41], ["VAL 296", 0.12], ["ASP 297", 0.37], ["TYR 298", 0.13], ["ASN 299", 0.11], ["THR 300", 0.0], ["PHE 301", 0.0], ["HIS 302", 0.04], ["ASP 303", 0.0], ["THR 304", 0.0], ["TYR 305", 0.0], ["GLU 306", 0.0], ["THR 307", 0.0], ["ASN 308", 0.0], ["THR 309", 0.0], ["PRO 310", 0.0], ["SER 311", 0.0], ["CYS 312", 0.0], ["CYS 313", 0.0], ["ALA 314", 0.0], ["LYS 315", 0.0], ["GLU 316", 0.0], ["LEU 317", 0.0], ["ALA 318", 0.0], ["GLU 319", 0.0], ["MET 320", 0.0], ["ARG 321", 0.0], ["GLN 322", 0.0], ["ARG 323", 0.02], ["TYR 324", 0.0], ["MET 325", 0.0], ["GLU 326", 0.05], ["LYS 327", 0.02], ["THR 328", 0.09], ["GLY 329", 0.0], ["LYS 330", 0.49], ["CYS 331", 0.01], ["ASN 332", 0.04], ["VAL 333", 0.01], ["HIS 334", 1.17], ["HIS 335", 0.27], ["GLY 336", 0.59], ["ASN 337", 1.12], ["VAL 338", 1.48], ["GLN 339", 2.52], ["GLU 340", 1.7], ["THR 341", 2.65], ["TYR 342", 3.32], ["ARG 343", 0.75], ["TYR 344", 1.31], ["LEU 345", 2.01], ["SER 346", 0.9], ["ASP 347", 0.06], ["LEU 348", 0.69], ["PHE 349", 1.47], ["THR 350", 0.68], ["THR 351", 0.0], ["LEU 352", 0.19], ["VAL 353", 0.0], ["ASP 354", 0.01], ["LEU 355", 0.29], ["LYS 356", 0.96], ["TRP 357", 0.09], ["ARG 358", 1.54], ["PHE 359", 1.69], ["ASN 360", 0.0], ["LEU 361", 0.62], ["LEU 362", 2.02], ["VAL 363", 0.54], ["PHE 364", 0.0], ["THR 365", 0.85], ["MET 366", 2.38], ["VAL 367", 0.3], ["TYR 368", 0.02], ["THR 369", 1.4], ["ILE 370", 2.34], ["THR 371", 0.22], ["TRP 372", 0.03], ["LEU 373", 2.24], ["PHE 374", 2.6], ["PHE 375", 0.47], ["GLY 376", 0.0], ["PHE 377", 3.03], ["ILE 378", 0.92], ["TRP 379", 0.0], ["TRP 380", 1.96], ["LEU 381", 1.83], ["ILE 382", 0.11], ["ALA 383", 0.01], ["TYR 384", 2.9], ["VAL 385", 1.71], ["ARG 386", 1.99], ["GLY 387", 1.48], ["ASP 388", 0.27], ["LEU 389", 0.46], ["ASP 390", 1.48], ["HIS 391", 0.84], ["VAL 392", 0.01], ["GLY 393", 0.0], ["ASP 394", 0.08], ["GLN 395", 0.04], ["GLU 396", 0.19], ["TRP 397", 0.67], ["ILE 398", 0.02], ["PRO 399", 0.0], ["CYS 400", 0.01], ["VAL 401", 0.0], ["GLU 402", 0.0], ["ASN 403", 0.0], ["LEU 404", 0.0], ["SER 405", 0.3], ["GLY 406", 0.92], ["PHE 407", 2.24], ["VAL 408", 1.93], ["SER 409", 0.11], ["ALA 410", 0.0], ["PHE 411", 1.55], ["LEU 412", 0.03], ["PHE 413", 0.0], ["SER 414", 0.0], ["ILE 415", 0.0], ["GLU 416", 0.0], ["THR 417", 0.0], ["GLU 418", 0.0], ["THR 419", 0.0], ["THR 420", 0.0], ["ILE 421", 0.0], ["GLY 422", 0.0], ["TYR 423", 0.0], ["GLY 424", 0.0], ["PHE 425", 0.0], ["ARG 426", 0.0], ["VAL 427", 0.0], ["ILE 428", 0.0], ["THR 429", 0.0], ["GLU 430", 0.03], ["LYS 431", 1.03], ["CYS 432", 0.41], ["PRO 433", 0.9], ["GLU 434", 0.79], ["GLY 435", 0.01], ["ILE 436", 0.0], ["ILE 437", 0.71], ["LEU 438", 0.26], ["LEU 439", 0.0], ["LEU 440", 0.15], ["VAL 441", 0.7], ["GLN 442", 0.0], ["ALA 443", 0.0], ["ILE 444", 0.19], ["LEU 445", 0.59], ["GLY 446", 0.0], ["SER 447", 0.0], ["ILE 448", 0.07], ["VAL 449", 0.14], ["ASN 450", 0.0], ["ALA 451", 0.0], ["PHE 452", 0.49], ["MET 453", 0.02], ["VAL 454", 0.0], ["GLY 455", 0.0], ["CYS 456", 0.0], ["MET 457", 0.0], ["PHE 458", 0.0], ["VAL 459", 0.0], ["LYS 460", 0.0], ["ILE 461", 0.0], ["SER 462", 0.0], ["GLN 463", 0.0], ["PRO 464", 0.0], ["LYS 465", 0.0], ["LYS 466", 0.0], ["ARG 467", 0.0], ["ALA 468", 0.0], ["GLU 469", 0.12], ["THR 470", 0.0], ["LEU 471", 0.0], ["MET 472", 0.0], ["PHE 473", 0.0], ["SER 474", 0.0], ["ASN 475", 0.01], ["ASN 476", 0.0], ["ALA 477", 0.0], ["VAL 478", 0.0], ["ILE 479", 0.0], ["SER 480", 0.0], ["MET 481", 0.0], ["ARG 482", 0.0], ["ASP 483", 0.0], ["GLU 484", 0.0], ["LYS 485", 0.0], ["LEU 486", 0.0], ["CYS 487", 0.0], ["LEU 488", 0.0], ["MET 489", 0.0], ["PHE 490", 0.0], ["ARG 491", 0.0], ["VAL 492", 0.0], ["GLY 493", 0.0], ["ASP 494", 0.0], ["LEU 495", 0.11], ["ARG 496", 0.94], ["ASN 497", 0.38], ["SER 498", 0.0], ["HIS 499", 0.0], ["ILE 500", 0.0], ["VAL 501", 0.0], ["GLU 502", 0.0], ["ALA 503", 0.0], ["SER 504", 0.0], ["ILE 505", 0.0], ["ARG 506", 0.0], ["ALA 507", 0.0], ["LYS 508", 0.0], ["LEU 509", 0.0], ["ILE 510", 0.0], ["LYS 511", 0.0], ["SER 512", 0.0], ["ARG 513", 0.0], ["GLN 514", 0.0], ["THR 515", 0.0], ["LYS 516", 0.0], ["GLU 517", 0.0], ["GLY 518", 0.0], ["GLU 519", 0.0], ["PHE 520", 0.0], ["ILE 521", 0.0], ["PRO 522", 0.0], ["LEU 523", 0.0], ["ASN 524", 0.0], ["GLN 525", 0.0], ["THR 526", 0.0], ["ASP 527", 0.0], ["ILE 528", 0.0], ["ASN 529", 0.0], ["VAL 530", 0.0], ["GLY 531", 0.0], ["PHE 532", 0.0], ["ASP 533", 0.0], ["THR 534", 0.0], ["GLY 535", 0.0], ["ASP 536", 0.0], ["ASP 537", 0.0], ["ARG 538", 0.0], ["LEU 539", 0.0], ["PHE 540", 0.0], ["LEU 541", 0.0], ["VAL 542", 0.0], ["SER 543", 0.0], ["PRO 544", 0.0], ["LEU 545", 0.0], ["ILE 546", 0.0], ["ILE 547", 0.0], ["SER 548", 0.0], ["HIS 549", 0.0], ["GLU 550", 0.0], ["ILE 551", 0.0], ["ASN 552", 0.0], ["GLU 553", 0.0], ["LYS 554", 0.0], ["SER 555", 0.0], ["PRO 556", 0.0], ["PHE 557", 0.0], ["TRP 558", 0.0], ["GLU 559", 0.0], ["MET 560", 0.0], ["SER 561", 0.0], ["ARG 562", 0.0], ["ALA 563", 0.0], ["GLN 564", 0.0], ["LEU 565", 0.0], ["GLU 566", 0.0], ["GLN 567", 0.0], ["GLU 568", 0.0], ["GLU 569", 0.0], ["PHE 570", 0.0], ["GLU 571", 0.0], ["VAL 572", 0.0], ["VAL 573", 0.0], ["VAL 574", 0.0], ["ILE 575", 0.0], ["LEU 576", 0.0], ["GLU 577", 0.0], ["GLY 578", 0.0], ["MET 579", 0.0], ["VAL 580", 0.0], ["GLU 581", 0.0], ["ALA 582", 0.0], ["THR 583", 0.0], ["GLY 584", 0.0], ["MET 585", 0.0], ["THR 586", 0.0], ["CYS 587", 0.0], ["GLN 588", 0.0], ["ALA 589", 0.0], ["ARG 590", 0.0], ["SER 591", 0.0], ["SER 592", 0.0], ["TYR 593", 0.0], ["MET 594", 0.0], ["ASP 595", 0.0], ["THR 596", 0.0], ["GLU 597", 0.0], ["VAL 598", 0.0], ["LEU 599", 0.0], ["TRP 600", 0.0], ["GLY 601", 0.0], ["HIS 602", 0.0], ["ARG 603", 0.0], ["PHE 604", 0.0], ["THR 605", 0.0], ["PRO 606", 0.0], ["VAL 607", 0.0], ["LEU 608", 0.0], ["THR 609", 0.08], ["LEU 610", 0.01], ["GLU 611", 0.43], ["LYS 612", 1.37], ["GLY 613", 0.12], ["PHE 614", 1.6], ["TYR 615", 0.0], ["GLU 616", 1.08], ["VAL 617", 0.39], ["ASP 618", 1.26], ["TYR 619", 0.41], ["ASN 620", 0.63], ["THR 621", 0.01], ["PHE 622", 0.0], ["HIS 623", 0.41], ["ASP 624", 0.06], ["THR 625", 0.0], ["TYR 626", 0.01], ["GLU 627", 0.0], ["THR 628", 0.0], ["ASN 629", 0.0], ["THR 630", 0.0], ["PRO 631", 0.0], ["SER 632", 0.0], ["CYS 633", 0.0], ["CYS 634", 0.0], ["ALA 635", 0.0], ["LYS 636", 0.0], ["GLU 637", 0.0], ["LEU 638", 0.0], ["ALA 639", 0.0], ["GLU 640", 0.0], ["MET 641", 0.0], ["ARG 642", 0.0], ["GLN 643", 0.0], ["ARG 644", 0.01], ["TYR 645", 0.0], ["MET 646", 0.0], ["GLU 647", 0.08], ["LYS 648", 0.02], ["THR 649", 0.06], ["GLY 650", 0.0], ["LYS 651", 0.46], ["CYS 652", 0.04], ["ASN 653", 0.08], ["VAL 654", 0.07], ["HIS 655", 2.83], ["HIS 656", 0.98], ["GLY 657", 1.3], ["ASN 658", 2.64], ["VAL 659", 1.71], ["GLN 660", 1.47], ["GLU 661", 2.22], ["THR 662", 2.7], ["TYR 663", 2.63], ["ARG 664", 0.53], ["TYR 665", 1.74], ["LEU 666", 1.73], ["SER 667", 0.21], ["ASP 668", 0.0], ["LEU 669", 0.62], ["PHE 670", 0.0], ["THR 671", 0.0], ["THR 672", 0.92], ["LEU 673", 0.54], ["VAL 674", 0.02], ["ASP 675", 0.73], ["LEU 676", 1.05], ["LYS 677", 2.14], ["TRP 678", 0.05], ["ARG 679", 1.56], ["PHE 680", 2.68], ["ASN 681", 0.04], ["LEU 682", 0.56], ["LEU 683", 1.85], ["VAL 684", 1.45], ["PHE 685", 0.03], ["THR 686", 0.45], ["MET 687", 2.54], ["VAL 688", 0.95], ["TYR 689", 0.0], ["THR 690", 0.85], ["ILE 691", 2.5], ["THR 692", 0.41], ["TRP 693", 0.0], ["LEU 694", 1.63], ["PHE 695", 3.05], ["PHE 696", 0.56], ["GLY 697", 0.0], ["PHE 698", 3.16], ["ILE 699", 1.41], ["TRP 700", 0.0], ["TRP 701", 2.1], ["LEU 702", 2.35], ["ILE 703", 0.43], ["ALA 704", 0.0], ["TYR 705", 2.98], ["VAL 706", 2.1], ["ARG 707", 2.18], ["GLY 708", 1.62], ["ASP 709", 0.53], ["LEU 710", 0.8], ["ASP 711", 1.11], ["HIS 712", 0.23], ["VAL 713", 0.09], ["GLY 714", 0.34], ["ASP 715", 0.48], ["GLN 716", 0.47], ["GLU 717", 0.58], ["TRP 718", 1.15], ["ILE 719", 0.11], ["PRO 720", 0.0], ["CYS 721", 0.0], ["VAL 722", 0.0], ["GLU 723", 0.0], ["ASN 724", 0.05], ["LEU 725", 0.05], ["SER 726", 0.51], ["GLY 727", 1.05], ["PHE 728", 1.9], ["VAL 729", 1.25], ["SER 730", 0.27], ["ALA 731", 0.0], ["PHE 732", 1.06], ["LEU 733", 0.43], ["PHE 734", 0.0], ["SER 735", 0.0], ["ILE 736", 0.09], ["GLU 737", 0.0], ["THR 738", 0.0], ["GLU 739", 0.0], ["THR 740", 0.0], ["THR 741", 0.0], ["ILE 742", 0.0], ["GLY 743", 0.0], ["TYR 744", 0.0], ["GLY 745", 0.0], ["PHE 746", 0.0], ["ARG 747", 0.0], ["VAL 748", 0.0], ["ILE 749", 0.0], ["THR 750", 0.02], ["GLU 751", 0.03], ["LYS 752", 1.04], ["CYS 753", 1.0], ["PRO 754", 1.93], ["GLU 755", 1.7], ["GLY 756", 0.06], ["ILE 757", 0.01], ["ILE 758", 1.83], ["LEU 759", 0.65], ["LEU 760", 0.0], ["LEU 761", 0.95], ["VAL 762", 1.02], ["GLN 763", 0.0], ["ALA 764", 0.0], ["ILE 765", 0.17], ["LEU 766", 0.6], ["GLY 767", 0.08], ["SER 768", 0.0], ["ILE 769", 0.17], ["VAL 770", 0.3], ["ASN 771", 0.0], ["ALA 772", 0.0], ["PHE 773", 0.19], ["MET 774", 0.15], ["VAL 775", 0.0], ["GLY 776", 0.0], ["CYS 777", 0.0], ["MET 778", 0.0], ["PHE 779", 0.0], ["VAL 780", 0.0], ["LYS 781", 0.0], ["ILE 782", 0.0], ["SER 783", 0.0], ["GLN 784", 0.0], ["PRO 785", 0.0], ["LYS 786", 0.05], ["LYS 787", 0.47], ["ARG 788", 0.0], ["ALA 789", 0.0], ["GLU 790", 0.18], ["THR 791", 0.01], ["LEU 792", 0.0], ["MET 793", 0.01], ["PHE 794", 0.0], ["SER 795", 0.0], ["ASN 796", 0.0], ["ASN 797", 0.0], ["ALA 798", 0.0], ["VAL 799", 0.0], ["ILE 800", 0.0], ["SER 801", 0.0], ["MET 802", 0.0], ["ARG 803", 0.0], ["ASP 804", 0.0], ["GLU 805", 0.0], ["LYS 806", 0.0], ["LEU 807", 0.0], ["CYS 808", 0.0], ["LEU 809", 0.0], ["MET 810", 0.0], ["PHE 811", 0.0], ["ARG 812", 0.0], ["VAL 813", 0.0], ["GLY 814", 0.0], ["ASP 815", 0.0], ["LEU 816", 0.0], ["ARG 817", 0.08], ["ASN 818", 0.0], ["SER 819", 0.0], ["HIS 820", 0.0], ["ILE 821", 0.0], ["VAL 822", 0.0], ["GLU 823", 0.0], ["ALA 824", 0.0], ["SER 825", 0.0], ["ILE 826", 0.0], ["ARG 827", 0.0], ["ALA 828", 0.0], ["LYS 829", 0.0], ["LEU 830", 0.0], ["ILE 831", 0.0], ["LYS 832", 0.0], ["SER 833", 0.0], ["ARG 834", 0.0], ["GLN 835", 0.0], ["THR 836", 0.0], ["LYS 837", 0.0], ["GLU 838", 0.0], ["GLY 839", 0.0], ["GLU 840", 0.0], ["PHE 841", 0.0], ["ILE 842", 0.0], ["PRO 843", 0.0], ["LEU 844", 0.0], ["ASN 845", 0.0], ["GLN 846", 0.0], ["THR 847", 0.0], ["ASP 848", 0.0], ["ILE 849", 0.0], ["ASN 850", 0.0], ["VAL 851", 0.0], ["GLY 852", 0.0], ["PHE 853", 0.0], ["ASP 854", 0.0], ["THR 855", 0.0], ["GLY 856", 0.0], ["ASP 857", 0.0], ["ASP 858", 0.0], ["ARG 859", 0.0], ["LEU 860", 0.0], ["PHE 861", 0.0], ["LEU 862", 0.0], ["VAL 863", 0.0], ["SER 864", 0.0], ["PRO 865", 0.0], ["LEU 866", 0.0], ["ILE 867", 0.0], ["ILE 868", 0.0], ["SER 869", 0.0], ["HIS 870", 0.0], ["GLU 871", 0.0], ["ILE 872", 0.0], ["ASN 873", 0.0], ["GLU 874", 0.0], ["LYS 875", 0.0], ["SER 876", 0.0], ["PRO 877", 0.0], ["PHE 878", 0.0], ["TRP 879", 0.0], ["GLU 880", 0.0], ["MET 881", 0.0], ["SER 882", 0.0], ["ARG 883", 0.0], ["ALA 884", 0.0], ["GLN 885", 0.0], ["LEU 886", 0.0], ["GLU 887", 0.0], ["GLN 888", 0.0], ["GLU 889", 0.0], ["GLU 890", 0.0], ["PHE 891", 0.0], ["GLU 892", 0.0], ["VAL 893", 0.0], ["VAL 894", 0.0], ["VAL 895", 0.0], ["ILE 896", 0.0], ["LEU 897", 0.0], ["GLU 898", 0.0], ["GLY 899", 0.0], ["MET 900", 0.0], ["VAL 901", 0.0], ["GLU 902", 0.0], ["ALA 903", 0.0], ["THR 904", 0.0], ["GLY 905", 0.0], ["MET 906", 0.0], ["THR 907", 0.0], ["CYS 908", 0.0], ["GLN 909", 0.0], ["ALA 910", 0.0], ["ARG 911", 0.0], ["SER 912", 0.0], ["SER 913", 0.0], ["TYR 914", 0.0], ["MET 915", 0.0], ["ASP 916", 0.0], ["THR 917", 0.0], ["GLU 918", 0.0], ["VAL 919", 0.0], ["LEU 920", 0.0], ["TRP 921", 0.0], ["GLY 922", 0.0], ["HIS 923", 0.0], ["ARG 924", 0.0], ["PHE 925", 0.0], ["THR 926", 0.0], ["PRO 927", 0.0], ["VAL 928", 0.0], ["LEU 929", 0.0], ["THR 930", 0.0], ["LEU 931", 0.0], ["GLU 932", 0.0], ["LYS 933", 0.0], ["GLY 934", 0.0], ["PHE 935", 0.0], ["TYR 936", 0.0], ["GLU 937", 0.0], ["VAL 938", 0.0], ["ASP 939", 0.0], ["TYR 940", 0.0], ["ASN 941", 0.01], ["THR 942", 0.0], ["PHE 943", 0.0], ["HIS 944", 0.0], ["ASP 945", 0.0], ["THR 946", 0.0], ["TYR 947", 0.0], ["GLU 948", 0.0], ["THR 949", 0.0], ["ASN 950", 0.0], ["THR 951", 0.0], ["PRO 952", 0.0], ["SER 953", 0.0], ["CYS 954", 0.0], ["CYS 955", 0.0], ["ALA 956", 0.0], ["LYS 957", 0.0], ["GLU 958", 0.0], ["LEU 959", 0.0], ["ALA 960", 0.0], ["GLU 961", 0.0], ["MET 962", 0.0], ["ARG 963", 0.0], ["GLN 964", 0.0], ["ARG 965", 0.0], ["TYR 966", 0.0], ["MET 967", 0.0], ["GLU 968", 0.0], ["LYS 969", 0.0], ["THR 970", 0.02], ["GLY 971", 0.0], ["LYS 972", 0.03], ["CYS 973", 0.0], ["ASN 974", 0.0], ["VAL 975", 0.0], ["HIS 976", 0.0], ["HIS 977", 0.0], ["GLY 978", 0.02], ["ASN 979", 0.36], ["VAL 980", 0.1], ["GLN 981", 0.71], ["GLU 982", 0.74], ["THR 983", 0.8], ["TYR 984", 2.88], ["ARG 985", 1.67], ["TYR 986", 1.68], ["LEU 987", 2.09], ["SER 988", 0.82], ["ASP 989", 0.0], ["LEU 990", 0.38], ["PHE 991", 0.01], ["THR 992", 0.0], ["THR 993", 0.09], ["LEU 994", 0.22], ["VAL 995", 0.0], ["ASP 996", 0.07], ["LEU 997", 0.25], ["LYS 998", 1.32], ["TRP 999", 0.89], ["ARG 1000", 2.23], ["PHE 1001", 1.87], ["ASN 1002", 0.01], ["LEU 1003", 1.31], ["LEU 1004", 2.18], ["VAL 1005", 0.71], ["PHE 1006", 0.12], ["THR 1007", 0.67], ["MET 1008", 1.9], ["VAL 1009", 0.38], ["TYR 1010", 0.02], ["THR 1011", 0.62], ["ILE 1012", 1.53], ["THR 1013", 0.12], ["TRP 1014", 0.01], ["LEU 1015", 1.28], ["PHE 1016", 2.05], ["PHE 1017", 0.3], ["GLY 1018", 0.0], ["PHE 1019", 2.42], ["ILE 1020", 0.93], ["TRP 1021", 0.0], ["TRP 1022", 1.56], ["LEU 1023", 1.72], ["ILE 1024", 0.46], ["ALA 1025", 0.0], ["TYR 1026", 1.97], ["VAL 1027", 1.6], ["ARG 1028", 1.61], ["GLY 1029", 1.38], ["ASP 1030", 0.53], ["LEU 1031", 0.55], ["ASP 1032", 0.73], ["HIS 1033", 0.03], ["VAL 1034", 0.02], ["GLY 1035", 0.07], ["ASP 1036", 0.05], ["GLN 1037", 0.02], ["GLU 1038", 0.04], ["TRP 1039", 0.06], ["ILE 1040", 0.15], ["PRO 1041", 0.03], ["CYS 1042", 0.01], ["VAL 1043", 0.0], ["GLU 1044", 0.0], ["ASN 1045", 0.0], ["LEU 1046", 0.0], ["SER 1047", 0.27], ["GLY 1048", 0.8], ["PHE 1049", 1.26], ["VAL 1050", 0.83], ["SER 1051", 0.15], ["ALA 1052", 0.0], ["PHE 1053", 0.48], ["LEU 1054", 0.02], ["PHE 1055", 0.0], ["SER 1056", 0.0], ["ILE 1057", 0.0], ["GLU 1058", 0.0], ["THR 1059", 0.0], ["GLU 1060", 0.0], ["THR 1061", 0.0], ["THR 1062", 0.0], ["ILE 1063", 0.0], ["GLY 1064", 0.0], ["TYR 1065", 0.0], ["GLY 1066", 0.0], ["PHE 1067", 0.0], ["ARG 1068", 0.0], ["VAL 1069", 0.0], ["ILE 1070", 0.0], ["THR 1071", 0.0], ["GLU 1072", 0.01], ["LYS 1073", 0.97], ["CYS 1074", 0.63], ["PRO 1075", 1.14], ["GLU 1076", 0.76], ["GLY 1077", 0.01], ["ILE 1078", 0.39], ["ILE 1079", 1.1], ["LEU 1080", 0.38], ["LEU 1081", 0.05], ["LEU 1082", 0.41], ["VAL 1083", 0.57], ["GLN 1084", 0.0], ["ALA 1085", 0.0], ["ILE 1086", 0.1], ["LEU 1087", 0.22], ["GLY 1088", 0.0], ["SER 1089", 0.0], ["ILE 1090", 0.01], ["VAL 1091", 0.01], ["ASN 1092", 0.0], ["ALA 1093", 0.0], ["PHE 1094", 0.14], ["MET 1095", 0.0], ["VAL 1096", 0.0], ["GLY 1097", 0.0], ["CYS 1098", 0.0], ["MET 1099", 0.01], ["PHE 1100", 0.0], ["VAL 1101", 0.0], ["LYS 1102", 0.14], ["ILE 1103", 0.01], ["SER 1104", 0.0], ["GLN 1105", 0.08], ["PRO 1106", 0.0], ["LYS 1107", 0.02], ["LYS 1108", 0.64], ["ARG 1109", 0.0], ["ALA 1110", 0.0], ["GLU 1111", 0.1], ["THR 1112", 0.0], ["LEU 1113", 0.0], ["MET 1114", 0.0], ["PHE 1115", 0.0], ["SER 1116", 0.0], ["ASN 1117", 0.0], ["ASN 1118", 0.0], ["ALA 1119", 0.0], ["VAL 1120", 0.0], ["ILE 1121", 0.0], ["SER 1122", 0.0], ["MET 1123", 0.0], ["ARG 1124", 0.0], ["ASP 1125", 0.0], ["GLU 1126", 0.0], ["LYS 1127", 0.0], ["LEU 1128", 0.0], ["CYS 1129", 0.0], ["LEU 1130", 0.0], ["MET 1131", 0.0], ["PHE 1132", 0.0], ["ARG 1133", 0.0], ["VAL 1134", 0.0], ["GLY 1135", 0.0], ["ASP 1136", 0.0], ["LEU 1137", 0.0], ["ARG 1138", 0.51], ["ASN 1139", 0.01], ["SER 1140", 0.0], ["HIS 1141", 0.0], ["ILE 1142", 0.0], ["VAL 1143", 0.0], ["GLU 1144", 0.0], ["ALA 1145", 0.0], ["SER 1146", 0.0], ["ILE 1147", 0.0], ["ARG 1148", 0.0], ["ALA 1149", 0.0], ["LYS 1150", 0.0], ["LEU 1151", 0.0], ["ILE 1152", 0.0], ["LYS 1153", 0.0], ["SER 1154", 0.0], ["ARG 1155", 0.0], ["GLN 1156", 0.0], ["THR 1157", 0.0], ["LYS 1158", 0.0], ["GLU 1159", 0.0], ["GLY 1160", 0.0], ["GLU 1161", 0.0], ["PHE 1162", 0.0], ["ILE 1163", 0.0], ["PRO 1164", 0.0], ["LEU 1165", 0.0], ["ASN 1166", 0.0], ["GLN 1167", 0.0], ["THR 1168", 0.0], ["ASP 1169", 0.0], ["ILE 1170", 0.0], ["ASN 1171", 0.0], ["VAL 1172", 0.0], ["GLY 1173", 0.0], ["PHE 1174", 0.0], ["ASP 1175", 0.0], ["THR 1176", 0.0], ["GLY 1177", 0.0], ["ASP 1178", 0.0], ["ASP 1179", 0.0], ["ARG 1180", 0.0], ["LEU 1181", 0.0], ["PHE 1182", 0.0], ["LEU 1183", 0.0], ["VAL 1184", 0.0], ["SER 1185", 0.0], ["PRO 1186", 0.0], ["LEU 1187", 0.0], ["ILE 1188", 0.0], ["ILE 1189", 0.0], ["SER 1190", 0.0], ["HIS 1191", 0.0], ["GLU 1192", 0.0], ["ILE 1193", 0.0], ["ASN 1194", 0.0], ["GLU 1195", 0.0], ["LYS 1196", 0.0], ["SER 1197", 0.0], ["PRO 1198", 0.0], ["PHE 1199", 0.0], ["TRP 1200", 0.0], ["GLU 1201", 0.0], ["MET 1202", 0.0], ["SER 1203", 0.0], ["ARG 1204", 0.0], ["ALA 1205", 0.0], ["GLN 1206", 0.0], ["LEU 1207", 0.0], ["GLU 1208", 0.0], ["GLN 1209", 0.0], ["GLU 1210", 0.0], ["GLU 1211", 0.0], ["PHE 1212", 0.0], ["GLU 1213", 0.0], ["VAL 1214", 0.0], ["VAL 1215", 0.0], ["VAL 1216", 0.0], ["ILE 1217", 0.0], ["LEU 1218", 0.0], ["GLU 1219", 0.0], ["GLY 1220", 0.0], ["MET 1221", 0.0], ["VAL 1222", 0.0], ["GLU 1223", 0.0], ["ALA 1224", 0.0], ["THR 1225", 0.0], ["GLY 1226", 0.0], ["MET 1227", 0.0], ["THR 1228", 0.0], ["CYS 1229", 0.0], ["GLN 1230", 0.0], ["ALA 1231", 0.0], ["ARG 1232", 0.0], ["SER 1233", 0.0], ["SER 1234", 0.0], ["TYR 1235", 0.0], ["MET 1236", 0.0], ["ASP 1237", 0.0], ["THR 1238", 0.0], ["GLU 1239", 0.0], ["VAL 1240", 0.0], ["LEU 1241", 0.0], ["TRP 1242", 0.0], ["GLY 1243", 0.0], ["HIS 1244", 0.0], ["ARG 1245", 0.0], ["PHE 1246", 0.0], ["THR 1247", 0.0], ["PRO 1248", 0.0], ["VAL 1249", 0.0], ["LEU 1250", 0.0], ["THR 1251", 0.0], ["LEU 1252", 0.0], ["GLU 1253", 0.01], ["LYS 1254", 0.01], ["GLY 1255", 0.0], ["PHE 1256", 0.17], ["TYR 1257", 0.0], ["GLU 1258", 0.05], ["VAL 1259", 0.0], ["ASP 1260", 0.01], ["TYR 1261", 0.0], ["ASN 1262", 0.01], ["THR 1263", 0.0], ["PHE 1264", 0.0], ["HIS 1265", 0.0], ["ASP 1266", 0.0], ["THR 1267", 0.0], ["TYR 1268", 0.0], ["GLU 1269", 0.0], ["THR 1270", 0.0], ["ASN 1271", 0.0], ["THR 1272", 0.0], ["PRO 1273", 0.0], ["SER 1274", 0.0], ["CYS 1275", 0.0], ["CYS 1276", 0.0], ["ALA 1277", 0.0], ["LYS 1278", 0.0], ["GLU 1279", 0.0], ["LEU 1280", 0.0], ["ALA 1281", 0.0], ["GLU 1282", 0.0], ["MET 1283", 0.0]], "POPS": [["ARG 0", 0.54], ["GLN 1", 0.01], ["ARG 2", 0.0], ["TYR 3", 0.0], ["MET 4", 0.0], ["GLU 5", 0.03], ["LYS 6", 0.01], ["THR 7", 0.12], ["GLY 8", 0.0], ["LYS 9", 1.04], ["CYS 10", 0.03], ["ASN 11", 0.07], ["VAL 12", 0.0], ["HIS 13", 1.01], ["HIS 14", 0.12], ["GLY 15", 1.23], ["ASN 16", 2.53], ["VAL 17", 0.73], ["GLN 18", 1.25], ["GLU 19", 0.29], ["THR 20", 1.88], ["TYR 21", 3.9], ["ARG 22", 3.15], ["TYR 23", 2.48], ["LEU 24", 3.75], ["SER 25", 2.75], ["ASP 26", 1.79], ["LEU 27", 2.77], ["PHE 28", 0.8], ["THR 29", 0.04], ["THR 30", 0.55], ["LEU 31", 0.95], ["VAL 32", 0.0], ["ASP 33", 0.29], ["LEU 34", 1.19], ["LYS 35", 4.02], ["TRP 36", 3.23], ["ARG 37", 4.22], ["PHE 38", 3.4], ["ASN 39", 0.02], ["LEU 40", 1.78], ["LEU 41", 3.11], ["VAL 42", 1.96], ["PHE 43", 1.08], ["THR 44", 1.44], ["MET 45", 3.83], ["VAL 46", 3.04], ["TYR 47", 0.12], ["THR 48", 1.63], ["ILE 49", 4.18], ["THR 50", 1.78], ["TRP 51", 0.01], ["LEU 52", 2.57], ["PHE 53", 4.33], ["PHE 54", 0.38], ["GLY 55", 0.0], ["PHE 56", 4.35], ["ILE 57", 1.06], ["TRP 58", 0.0], ["TRP 59", 2.63], ["LEU 60", 2.41], ["ILE 61", 0.53], ["ALA 62", 0.0], ["TYR 63", 2.86], ["VAL 64", 2.43], ["ARG 65", 2.67], ["GLY 66", 1.48], ["ASP 67", 0.66], ["LEU 68", 0.62], ["ASP 69", 1.42], ["HIS 70", 0.85], ["VAL 71", 0.1], ["GLY 72", 0.01], ["ASP 73", 0.01], ["GLN 74", 0.0], ["GLU 75", 0.16], ["TRP 76", 0.12], ["ILE 77", 0.34], ["PRO 78", 0.05], ["CYS 79", 0.04], ["VAL 80", 0.0], ["GLU 81", 0.0], ["ASN 82", 0.1], ["LEU 83", 0.03], ["SER 84", 1.49], ["GLY 85", 1.83], ["PHE 86", 2.55], ["VAL 87", 2.03], ["SER 88", 0.98], ["ALA 89", 0.0], ["PHE 90", 1.41], ["LEU 91", 0.08], ["PHE 92", 0.0], ["SER 93", 0.0], ["ILE 94", 0.0], ["GLU 95", 0.0], ["THR 96", 0.0], ["GLU 97", 0.0], ["THR 98", 0.0], ["THR 99", 0.0], ["ILE 100", 0.0], ["GLY 101", 0.0], ["TYR 102", 0.0], ["GLY 103", 0.0], ["PHE 104", 0.0], ["ARG 105", 0.21], ["VAL 106", 0.0], ["ILE 107", 0.0], ["THR 108", 0.0], ["GLU 109", 0.2], ["LYS 110", 1.4], ["CYS 111", 0.93], ["PRO 112", 1.59], ["GLU 113", 1.35], ["GLY 114", 0.01], ["ILE 115", 0.2], ["ILE 116", 1.73], ["LEU 117", 0.95], ["LEU 118", 0.0], ["LEU 119", 0.58], ["VAL 120", 1.27], ["GLN 121", 0.18], ["ALA 122", 0.0], ["ILE 123", 0.05], ["LEU 124", 1.65], ["GLY 125", 0.08], ["SER 126", 0.0], ["ILE 127", 1.94], ["VAL 128", 1.4], ["ASN 129", 0.01], ["ALA 130", 0.0], ["PHE 131", 2.71], ["MET 132", 0.04], ["VAL 133", 0.0], ["GLY 134", 0.0], ["CYS 135", 0.02], ["MET 136", 0.03], ["PHE 137", 0.0], ["VAL 138", 0.0], ["LYS 139", 2.51], ["ILE 140", 1.96], ["SER 141", 1.86], ["GLN 142", 2.28], ["PRO 143", 2.05], ["LYS 144", 3.83], ["LYS 145", 3.75], ["ARG 146", 0.16], ["ALA 147", 0.05], ["GLU 148", 1.38], ["THR 149", 0.07], ["LEU 150", 0.0], ["MET 151", 0.03], ["PHE 152", 0.0], ["SER 153", 0.0], ["ASN 154", 0.0], ["ASN 155", 0.0], ["ALA 156", 0.0], ["VAL 157", 0.0], ["ILE 158", 0.0], ["SER 159", 0.0], ["MET 160", 0.0], ["ARG 161", 0.0], ["ASP 162", 0.0], ["GLU 163", 0.0], ["LYS 164", 0.0], ["LEU 165", 0.0], ["CYS 166", 0.0], ["LEU 167", 0.0], ["MET 168", 0.0], ["PHE 169", 0.0], ["ARG 170", 0.0], ["VAL 171", 0.0], ["GLY 172", 0.0], ["ASP 173", 0.0], ["LEU 174", 0.01], ["ARG 175", 1.72], ["ASN 176", 0.74], ["SER 177", 0.25], ["HIS 178", 0.01], ["ILE 179", 0.0], ["VAL 180", 0.0], ["GLU 181", 0.0], ["ALA 182", 0.0], ["SER 183", 0.0], ["ILE 184", 0.0], ["ARG 185", 0.0], ["ALA 186", 0.0], ["LYS 187", 0.0], ["LEU 188", 0.0], ["ILE 189", 0.0], ["LYS 190", 0.0], ["SER 191", 0.0], ["ARG 192", 0.0], ["GLN 193", 0.0], ["THR 194", 0.0], ["LYS 195", 0.0], ["GLU 196", 0.0], ["GLY 197", 0.0], ["GLU 198", 0.0], ["PHE 199", 0.0], ["ILE 200", 0.0], ["PRO 201", 0.0], ["LEU 202", 0.0], ["ASN 203", 0.0], ["GLN 204", 0.0], ["THR 205", 0.0], ["ASP 206", 0.0], ["ILE 207", 0.0], ["ASN 208", 0.0], ["VAL 209", 0.0], ["GLY 210", 0.0], ["PHE 211", 0.0], ["ASP 212", 0.0], ["THR 213", 0.0], ["GLY 214", 0.0], ["ASP 215", 0.0], ["ASP 216", 0.0], ["ARG 217", 0.0], ["LEU 218", 0.0], ["PHE 219", 0.0], ["LEU 220", 0.0], ["VAL 221", 0.0], ["SER 222", 0.0], ["PRO 223", 0.0], ["LEU 224", 0.0], ["ILE 225", 0.0], ["ILE 226", 0.0], ["SER 227", 0.0], ["HIS 228", 0.0], ["GLU 229", 0.0], ["ILE 230", 0.0], ["ASN 231", 0.0], ["GLU 232", 0.0], ["LYS 233", 0.0], ["SER 234", 0.0], ["PRO 235", 0.0], ["PHE 236", 0.0], ["TRP 237", 0.0], ["GLU 238", 0.0], ["MET 239", 0.0], ["SER 240", 0.0], ["ARG 241", 0.0], ["ALA 242", 0.0], ["GLN 243", 0.0], ["LEU 244", 0.0], ["GLU 245", 0.0], ["GLN 246", 0.0], ["GLU 247", 0.0], ["GLU 248", 0.0], ["PHE 249", 0.0], ["GLU 250", 0.0], ["VAL 251", 0.0], ["VAL 252", 0.0], ["VAL 253", 0.0], ["ILE 254", 0.0], ["LEU 255", 0.0], ["GLU 256", 0.0], ["GLY 257", 0.0], ["MET 258", 0.0], ["VAL 259", 0.08], ["GLU 260", 0.01], ["ALA 261", 0.34], ["THR 262", 1.75], ["GLY 263", 0.0], ["MET 264", 0.01], ["THR 265", 0.0], ["CYS 266", 0.0], ["GLN 267", 0.0], ["ALA 268", 0.0], ["ARG 269", 0.03], ["SER 270", 0.0], ["SER 271", 0.0], ["TYR 272", 0.0], ["MET 273", 0.0], ["ASP 274", 0.0], ["THR 275", 0.0], ["GLU 276", 0.0], ["VAL 277", 0.0], ["LEU 278", 0.0], ["TRP 279", 0.0], ["GLY 280", 0.0], ["HIS 281", 0.0], ["ARG 282", 0.0], ["PHE 283", 0.0], ["THR 284", 0.0], ["PRO 285", 0.0], ["VAL 286", 0.0], ["LEU 287", 0.0], ["THR 288", 0.0], ["LEU 289", 0.0], ["GLU 290", 0.0], ["LYS 291", 0.0], ["GLY 292", 0.01], ["PHE 293", 0.74], ["TYR 294", 0.01], ["GLU 295", 0.12], ["VAL 296", 0.0], ["ASP 297", 0.11], ["TYR 298", 0.14], ["ASN 299", 0.13], ["THR 300", 0.0], ["PHE 301", 0.0], ["HIS 302", 0.1], ["ASP 303", 0.0], ["THR 304", 0.0], ["TYR 305", 0.0], ["GLU 306", 0.0], ["THR 307", 0.0], ["ASN 308", 0.0], ["THR 309", 0.0], ["PRO 310", 0.0], ["SER 311", 0.0], ["CYS 312", 0.0], ["CYS 313", 0.0], ["ALA 314", 0.0], ["LYS 315", 0.0], ["GLU 316", 0.0], ["LEU 317", 0.0], ["ALA 318", 0.0], ["GLU 319", 0.0], ["MET 320", 0.0], ["ARG 321", 0.0], ["GLN 322", 0.0], ["ARG 323", 0.11], ["TYR 324", 0.0], ["MET 325", 0.0], ["GLU 326", 0.16], ["LYS 327", 0.1], ["THR 328", 0.35], ["GLY 329", 0.0], ["LYS 330", 1.68], ["CYS 331", 0.21], ["ASN 332", 0.21], ["VAL 333", 0.05], ["HIS 334", 1.96], ["HIS 335", 0.21], ["GLY 336", 0.72], ["ASN 337", 1.06], ["VAL 338", 1.44], ["GLN 339", 1.56], ["GLU 340", 0.94], ["THR 341", 1.96], ["TYR 342", 3.62], ["ARG 343", 1.69], ["TYR 344", 2.34], ["LEU 345", 3.4], ["SER 346", 2.2], ["ASP 347", 0.17], ["LEU 348", 2.01], ["PHE 349", 4.83], ["THR 350", 3.45], ["THR 351", 2.04], ["LEU 352", 2.38], ["VAL 353", 1.92], ["ASP 354", 0.83], ["LEU 355", 1.31], ["LYS 356", 4.12], ["TRP 357", 2.14], ["ARG 358", 4.42], ["PHE 359", 3.05], ["ASN 360", 0.06], ["LEU 361", 2.62], ["LEU 362", 2.83], ["VAL 363", 1.84], ["PHE 364", 0.17], ["THR 365", 0.79], ["MET 366", 2.25], ["VAL 367", 2.63], ["TYR 368", 0.95], ["THR 369", 0.42], ["ILE 370", 3.2], ["THR 371", 2.18], ["TRP 372", 0.0], ["LEU 373", 1.3], ["PHE 374", 5.58], ["PHE 375", 2.18], ["GLY 376", 0.0], ["PHE 377", 3.35], ["ILE 378", 1.84], ["TRP 379", 0.0], ["TRP 380", 1.89], ["LEU 381", 2.16], ["ILE 382", 0.49], ["ALA 383", 0.07], ["TYR 384", 2.42], ["VAL 385", 2.12], ["ARG 386", 2.89], ["GLY 387", 1.12], ["ASP 388", 0.22], ["LEU 389", 0.7], ["ASP 390", 0.84], ["HIS 391", 1.2], ["VAL 392", 0.05], ["GLY 393", 0.0], ["ASP 394", 0.1], ["GLN 395", 0.1], ["GLU 396", 0.35], ["TRP 397", 1.31], ["ILE 398", 0.19], ["PRO 399", 0.01], ["CYS 400", 0.11], ["VAL 401", 0.0], ["GLU 402", 0.0], ["ASN 403", 0.03], ["LEU 404", 0.03], ["SER 405", 0.74], ["GLY 406", 1.13], ["PHE 407", 1.66], ["VAL 408", 1.14], ["SER 409", 0.27], ["ALA 410", 0.0], ["PHE 411", 0.41], ["LEU 412", 0.02], ["PHE 413", 0.0], ["SER 414", 0.0], ["ILE 415", 0.0], ["GLU 416", 0.0], ["THR 417", 0.0], ["GLU 418", 0.0], ["THR 419", 0.0], ["THR 420", 0.0], ["ILE 421", 0.0], ["GLY 422", 0.0], ["TYR 423", 0.0], ["GLY 424", 0.0], ["PHE 425", 0.0], ["ARG 426", 0.01], ["VAL 427", 0.0], ["ILE 428", 0.0], ["THR 429", 0.01], ["GLU 430", 0.26], ["LYS 431", 2.84], ["CYS 432", 0.94], ["PRO 433", 2.21], ["GLU 434", 1.64], ["GLY 435", 0.02], ["ILE 436", 0.09], ["ILE 437", 2.01], ["LEU 438", 0.77], ["LEU 439", 0.0], ["LEU 440", 0.57], ["VAL 441", 2.4], ["GLN 442", 0.0], ["ALA 443", 0.02], ["ILE 444", 0.83], ["LEU 445", 2.82], ["GLY 446", 0.04], ["SER 447", 0.0], ["ILE 448", 0.46], ["VAL 449", 2.27], ["ASN 450", 0.0], ["ALA 451", 0.0], ["PHE 452", 2.95], ["MET 453", 1.92], ["VAL 454", 0.0], ["GLY 455", 0.09], ["CYS 456", 1.87], ["MET 457", 0.0], ["PHE 458", 0.0], ["VAL 459", 0.4], ["LYS 460", 1.91], ["ILE 461", 0.0], ["SER 462", 0.0], ["GLN 463", 0.39], ["PRO 464", 0.06], ["LYS 465", 1.6], ["LYS 466", 1.92], ["ARG 467", 0.0], ["ALA 468", 0.0], ["GLU 469", 1.77], ["THR 470", 0.0], ["LEU 471", 0.0], ["MET 472", 0.01], ["PHE 473", 0.0], ["SER 474", 0.0], ["ASN 475", 0.02], ["ASN 476", 0.0], ["ALA 477", 0.0], ["VAL 478", 0.0], ["ILE 479", 0.0], ["SER 480", 0.0], ["MET 481", 0.0], ["ARG 482", 0.0], ["ASP 483", 0.0], ["GLU 484", 0.0], ["LYS 485", 0.0], ["LEU 486", 0.0], ["CYS 487", 0.0], ["LEU 488", 0.0], ["MET 489", 0.0], ["PHE 490", 0.0], ["ARG 491", 0.0], ["VAL 492", 0.0], ["GLY 493", 0.0], ["ASP 494", 0.0], ["LEU 495", 0.3], ["ARG 496", 1.26], ["ASN 497", 0.7], ["SER 498", 0.11], ["HIS 499", 0.01], ["ILE 500", 0.0], ["VAL 501", 0.0], ["GLU 502", 0.0], ["ALA 503", 0.0], ["SER 504", 0.0], ["ILE 505", 0.0], ["ARG 506", 0.0], ["ALA 507", 0.0], ["LYS 508", 0.0], ["LEU 509", 0.0], ["ILE 510", 0.0], ["LYS 511", 0.0], ["SER 512", 0.0], ["ARG 513", 0.0], ["GLN 514", 0.0], ["THR 515", 0.0], ["LYS 516", 0.0], ["GLU 517", 0.0], ["GLY 518", 0.0], ["GLU 519", 0.0], ["PHE 520", 0.0], ["ILE 521", 0.0], ["PRO 522", 0.0], ["LEU 523", 0.0], ["ASN 524", 0.0], ["GLN 525", 0.0], ["THR 526", 0.0], ["ASP 527", 0.0], ["ILE 528", 0.0], ["ASN 529", 0.0], ["VAL 530", 0.0], ["GLY 531", 0.0], ["PHE 532", 0.0], ["ASP 533", 0.0], ["THR 534", 0.0], ["GLY 535", 0.0], ["ASP 536", 0.0], ["ASP 537", 0.0], ["ARG 538", 0.0], ["LEU 539", 0.0], ["PHE 540", 0.0], ["LEU 541", 0.0], ["VAL 542", 0.0], ["SER 543", 0.0], ["PRO 544", 0.0], ["LEU 545", 0.0], ["ILE 546", 0.0], ["ILE 547", 0.0], ["SER 548", 0.0], ["HIS 549", 0.0], ["GLU 550", 0.0], ["ILE 551", 0.0], ["ASN 552", 0.0], ["GLU 553", 0.0], ["LYS 554", 0.0], ["SER 555", 0.0], ["PRO 556", 0.0], ["PHE 557", 0.0], ["TRP 558", 0.0], ["GLU 559", 0.0], ["MET 560", 0.0], ["SER 561", 0.0], ["ARG 562", 0.0], ["ALA 563", 0.0], ["GLN 564", 0.0], ["LEU 565", 0.0], ["GLU 566", 0.0], ["GLN 567", 0.0], ["GLU 568", 0.0], ["GLU 569", 0.0], ["PHE 570", 0.0], ["GLU 571", 0.0], ["VAL 572", 0.0], ["VAL 573", 0.0], ["VAL 574", 0.0], ["ILE 575", 0.0], ["LEU 576", 0.0], ["GLU 577", 0.0], ["GLY 578", 0.0], ["MET 579", 0.0], ["VAL 580", 0.0], ["GLU 581", 0.01], ["ALA 582", 0.0], ["THR 583", 0.0], ["GLY 584", 0.0], ["MET 585", 0.0], ["THR 586", 0.0], ["CYS 587", 0.0], ["GLN 588", 0.0], ["ALA 589", 0.0], ["ARG 590", 0.02], ["SER 591", 0.0], ["SER 592", 0.0], ["TYR 593", 0.0], ["MET 594", 0.0], ["ASP 595", 0.0], ["THR 596", 0.0], ["GLU 597", 0.0], ["VAL 598", 0.0], ["LEU 599", 0.0], ["TRP 600", 0.0], ["GLY 601", 0.0], ["HIS 602", 0.0], ["ARG 603", 0.0], ["PHE 604", 0.0], ["THR 605", 0.0], ["PRO 606", 0.0], ["VAL 607", 0.0], ["LEU 608", 0.0], ["THR 609", 0.0], ["LEU 610", 0.0], ["GLU 611", 0.24], ["LYS 612", 1.9], ["GLY 613", 0.3], ["PHE 614", 1.55], ["TYR 615", 0.0], ["GLU 616", 0.24], ["VAL 617", 0.03], ["ASP 618", 0.42], ["TYR 619", 0.48], ["ASN 620", 0.73], ["THR 621", 0.01], ["PHE 622", 0.01], ["HIS 623", 0.66], ["ASP 624", 0.05], ["THR 625", 0.0], ["TYR 626", 0.01], ["GLU 627", 0.0], ["THR 628", 0.0], ["ASN 629", 0.0], ["THR 630", 0.0], ["PRO 631", 0.0], ["SER 632", 0.0], ["CYS 633", 0.0], ["CYS 634", 0.0], ["ALA 635", 0.0], ["LYS 636", 0.0], ["GLU 637", 0.0], ["LEU 638", 0.0], ["ALA 639", 0.0], ["GLU 640", 0.0], ["MET 641", 0.0], ["ARG 642", 0.0], ["GLN 643", 0.01], ["ARG 644", 0.05], ["TYR 645", 0.0], ["MET 646", 0.0], ["GLU 647", 0.17], ["LYS 648", 0.08], ["THR 649", 0.26], ["GLY 650", 0.0], ["LYS 651", 1.58], ["CYS 652", 0.17], ["ASN 653", 0.23], ["VAL 654", 0.17], ["HIS 655", 2.58], ["HIS 656", 0.91], ["GLY 657", 0.84], ["ASN 658", 1.98], ["VAL 659", 1.31], ["GLN 660", 0.88], ["GLU 661", 1.39], ["THR 662", 1.91], ["TYR 663", 4.15], ["ARG 664", 1.12], ["TYR 665", 1.49], ["LEU 666", 3.29], ["SER 667", 2.56], ["ASP 668", 0.41], ["LEU 669", 2.51], ["PHE 670", 0.02], ["THR 671", 0.0], ["THR 672", 0.95], ["LEU 673", 1.02], ["VAL 674", 0.06], ["ASP 675", 1.53], ["LEU 676", 1.33], ["LYS 677", 4.03], ["TRP 678", 2.56], ["ARG 679", 4.53], ["PHE 680", 3.43], ["ASN 681", 0.2], ["LEU 682", 3.11], ["LEU 683", 3.23], ["VAL 684", 1.68], ["PHE 685", 0.17], ["THR 686", 0.46], ["MET 687", 3.05], ["VAL 688", 1.98], ["TYR 689", 0.02], ["THR 690", 1.07], ["ILE 691", 3.59], ["THR 692", 1.5], ["TRP 693", 0.01], ["LEU 694", 2.28], ["PHE 695", 4.86], ["PHE 696", 1.62], ["GLY 697", 0.0], ["PHE 698", 3.64], ["ILE 699", 2.11], ["TRP 700", 0.0], ["TRP 701", 2.03], ["LEU 702", 1.95], ["ILE 703", 0.63], ["ALA 704", 0.0], ["TYR 705", 2.24], ["VAL 706", 1.88], ["ARG 707", 1.87], ["GLY 708", 1.15], ["ASP 709", 0.29], ["LEU 710", 0.77], ["ASP 711", 0.79], ["HIS 712", 0.23], ["VAL 713", 0.14], ["GLY 714", 0.12], ["ASP 715", 0.22], ["GLN 716", 0.42], ["GLU 717", 0.42], ["TRP 718", 0.96], ["ILE 719", 0.25], ["PRO 720", 0.04], ["CYS 721", 0.1], ["VAL 722", 0.0], ["GLU 723", 0.0], ["ASN 724", 0.25], ["LEU 725", 0.18], ["SER 726", 1.28], ["GLY 727", 1.88], ["PHE 728", 2.32], ["VAL 729", 1.93], ["SER 730", 0.84], ["ALA 731", 0.0], ["PHE 732", 1.46], ["LEU 733", 0.24], ["PHE 734", 0.0], ["SER 735", 0.0], ["ILE 736", 0.03], ["GLU 737", 0.0], ["THR 738", 0.0], ["GLU 739", 0.0], ["THR 740", 0.0], ["THR 741", 0.0], ["ILE 742", 0.0], ["GLY 743", 0.0], ["TYR 744", 0.0], ["GLY 745", 0.0], ["PHE 746", 0.0], ["ARG 747", 0.02], ["VAL 748", 0.0], ["ILE 749", 0.0], ["THR 750", 0.04], ["GLU 751", 0.02], ["LYS 752", 0.85], ["CYS 753", 0.44], ["PRO 754", 0.62], ["GLU 755", 1.23], ["GLY 756", 0.17], ["ILE 757", 0.01], ["ILE 758", 0.76], ["LEU 759", 1.18], ["LEU 760", 0.0], ["LEU 761", 0.23], ["VAL 762", 1.42], ["GLN 763", 0.01], ["ALA 764", 0.0], ["ILE 765", 0.07], ["LEU 766", 2.23], ["GLY 767", 0.41], ["SER 768", 0.0], ["ILE 769", 1.75], ["VAL 770", 1.73], ["ASN 771", 0.0], ["ALA 772", 0.0], ["PHE 773", 2.27], ["MET 774", 0.67], ["VAL 775", 0.0], ["GLY 776", 0.0], ["CYS 777", 0.0], ["MET 778", 0.07], ["PHE 779", 0.0], ["VAL 780", 0.0], ["LYS 781", 2.35], ["ILE 782", 1.66], ["SER 783", 0.98], ["GLN 784", 1.49], ["PRO 785", 1.33], ["LYS 786", 2.34], ["LYS 787", 3.38], ["ARG 788", 0.35], ["ALA 789", 0.01], ["GLU 790", 0.76], ["THR 791", 0.05], ["LEU 792", 0.0], ["MET 793", 0.05], ["PHE 794", 0.0], ["SER 795", 0.0], ["ASN 796", 0.0], ["ASN 797", 0.0], ["ALA 798", 0.0], ["VAL 799", 0.0], ["ILE 800", 0.0], ["SER 801", 0.0], ["MET 802", 0.0], ["ARG 803", 0.0], ["ASP 804", 0.0], ["GLU 805", 0.0], ["LYS 806", 0.0], ["LEU 807", 0.0], ["CYS 808", 0.0], ["LEU 809", 0.0], ["MET 810", 0.0], ["PHE 811", 0.0], ["ARG 812", 0.0], ["VAL 813", 0.0], ["GLY 814", 0.0], ["ASP 815", 0.0], ["LEU 816", 0.04], ["ARG 817", 0.75], ["ASN 818", 0.09], ["SER 819", 0.0], ["HIS 820", 0.01], ["ILE 821", 0.0], ["VAL 822", 0.0], ["GLU 823", 0.0], ["ALA 824", 0.0], ["SER 825", 0.0], ["ILE 826", 0.0], ["ARG 827", 0.0], ["ALA 828", 0.0], ["LYS 829", 0.0], ["LEU 830", 0.0], ["ILE 831", 0.0], ["LYS 832", 0.0], ["SER 833", 0.0], ["ARG 834", 0.0], ["GLN 835", 0.0], ["THR 836", 0.0], ["LYS 837", 0.0], ["GLU 838", 0.0], ["GLY 839", 0.0], ["GLU 840", 0.0], ["PHE 841", 0.0], ["ILE 842", 0.0], ["PRO 843", 0.0], ["LEU 844", 0.0], ["ASN 845", 0.0], ["GLN 846", 0.0], ["THR 847", 0.0], ["ASP 848", 0.0], ["ILE 849", 0.0], ["ASN 850", 0.0], ["VAL 851", 0.0], ["GLY 852", 0.0], ["PHE 853", 0.0], ["ASP 854", 0.0], ["THR 855", 0.0], ["GLY 856", 0.0], ["ASP 857", 0.0], ["ASP 858", 0.0], ["ARG 859", 0.0], ["LEU 860", 0.0], ["PHE 861", 0.0], ["LEU 862", 0.0], ["VAL 863", 0.0], ["SER 864", 0.0], ["PRO 865", 0.0], ["LEU 866", 0.0], ["ILE 867", 0.0], ["ILE 868", 0.0], ["SER 869", 0.0], ["HIS 870", 0.0], ["GLU 871", 0.0], ["ILE 872", 0.0], ["ASN 873", 0.0], ["GLU 874", 0.0], ["LYS 875", 0.0], ["SER 876", 0.0], ["PRO 877", 0.0], ["PHE 878", 0.0], ["TRP 879", 0.0], ["GLU 880", 0.0], ["MET 881", 0.0], ["SER 882", 0.0], ["ARG 883", 0.0], ["ALA 884", 0.0], ["GLN 885", 0.0], ["LEU 886", 0.0], ["GLU 887", 0.0], ["GLN 888", 0.0], ["GLU 889", 0.0], ["GLU 890", 0.0], ["PHE 891", 0.0], ["GLU 892", 0.0], ["VAL 893", 0.0], ["VAL 894", 0.0], ["VAL 895", 0.0], ["ILE 896", 0.0], ["LEU 897", 0.0], ["GLU 898", 0.0], ["GLY 899", 0.0], ["MET 900", 0.0], ["VAL 901", 0.0], ["GLU 902", 0.0], ["ALA 903", 0.0], ["THR 904", 0.0], ["GLY 905", 0.0], ["MET 906", 0.0], ["THR 907", 0.0], ["CYS 908", 0.0], ["GLN 909", 0.0], ["ALA 910", 0.0], ["ARG 911", 0.0], ["SER 912", 0.0], ["SER 913", 0.0], ["TYR 914", 0.0], ["MET 915", 0.0], ["ASP 916", 0.0], ["THR 917", 0.0], ["GLU 918", 0.0], ["VAL 919", 0.0], ["LEU 920", 0.0], ["TRP 921", 0.0], ["GLY 922", 0.0], ["HIS 923", 0.0], ["ARG 924", 0.0], ["PHE 925", 0.0], ["THR 926", 0.0], ["PRO 927", 0.0], ["VAL 928", 0.0], ["LEU 929", 0.0], ["THR 930", 0.0], ["LEU 931", 0.0], ["GLU 932", 0.0], ["LYS 933", 0.0], ["GLY 934", 0.0], ["PHE 935", 0.0], ["TYR 936", 0.0], ["GLU 937", 0.0], ["VAL 938", 0.0], ["ASP 939", 0.01], ["TYR 940", 0.01], ["ASN 941", 0.04], ["THR 942", 0.0], ["PHE 943", 0.0], ["HIS 944", 0.0], ["ASP 945", 0.0], ["THR 946", 0.0], ["TYR 947", 0.0], ["GLU 948", 0.0], ["THR 949", 0.0], ["ASN 950", 0.0], ["THR 951", 0.0], ["PRO 952", 0.0], ["SER 953", 0.0], ["CYS 954", 0.0], ["CYS 955", 0.0], ["ALA 956", 0.0], ["LYS 957", 0.0], ["GLU 958", 0.0], ["LEU 959", 0.0], ["ALA 960", 0.0], ["GLU 961", 0.0], ["MET 962", 0.0], ["ARG 963", 0.0], ["GLN 964", 0.0], ["ARG 965", 0.0], ["TYR 966", 0.0], ["MET 967", 0.0], ["GLU 968", 0.0], ["LYS 969", 0.0], ["THR 970", 0.06], ["GLY 971", 0.0], ["LYS 972", 0.39], ["CYS 973", 0.0], ["ASN 974", 0.0], ["VAL 975", 0.0], ["HIS 976", 0.05], ["HIS 977", 0.04], ["GLY 978", 0.05], ["ASN 979", 0.64], ["VAL 980", 0.31], ["GLN 981", 1.15], ["GLU 982", 0.77], ["THR 983", 0.9], ["TYR 984", 3.05], ["ARG 985", 2.22], ["TYR 986", 3.21], ["LEU 987", 4.01], ["SER 988", 3.59], ["ASP 989", 0.71], ["LEU 990", 2.7], ["PHE 991", 1.73], ["THR 992", 0.64], ["THR 993", 0.78], ["LEU 994", 0.97], ["VAL 995", 0.12], ["ASP 996", 0.81], ["LEU 997", 1.47], ["LYS 998", 4.39], ["TRP 999", 3.57], ["ARG 1000", 3.78], ["PHE 1001", 3.03], ["ASN 1002", 0.04], ["LEU 1003", 2.8], ["LEU 1004", 3.21], ["VAL 1005", 1.69], ["PHE 1006", 0.23], ["THR 1007", 0.98], ["MET 1008", 3.45], ["VAL 1009", 1.82], ["TYR 1010", 0.18], ["THR 1011", 1.24], ["ILE 1012", 3.77], ["THR 1013", 1.63], ["TRP 1014", 0.02], ["LEU 1015", 2.45], ["PHE 1016", 5.7], ["PHE 1017", 2.0], ["GLY 1018", 0.0], ["PHE 1019", 4.56], ["ILE 1020", 2.67], ["TRP 1021", 0.01], ["TRP 1022", 2.62], ["LEU 1023", 2.88], ["ILE 1024", 0.97], ["ALA 1025", 0.02], ["TYR 1026", 2.8], ["VAL 1027", 2.36], ["ARG 1028", 2.76], ["GLY 1029", 1.64], ["ASP 1030", 0.51], ["LEU 1031", 0.61], ["ASP 1032", 0.82], ["HIS 1033", 0.03], ["VAL 1034", 0.03], ["GLY 1035", 0.04], ["ASP 1036", 0.04], ["GLN 1037", 0.03], ["GLU 1038", 0.05], ["TRP 1039", 0.29], ["ILE 1040", 0.82], ["PRO 1041", 0.27], ["CYS 1042", 0.08], ["VAL 1043", 0.0], ["GLU 1044", 0.0], ["ASN 1045", 0.05], ["LEU 1046", 0.03], ["SER 1047", 0.93], ["GLY 1048", 1.85], ["PHE 1049", 2.54], ["VAL 1050", 1.9], ["SER 1051", 0.73], ["ALA 1052", 0.0], ["PHE 1053", 1.15], ["LEU 1054", 0.32], ["PHE 1055", 0.0], ["SER 1056", 0.0], ["ILE 1057", 0.0], ["GLU 1058", 0.0], ["THR 1059", 0.0], ["GLU 1060", 0.0], ["THR 1061", 0.0], ["THR 1062", 0.0], ["ILE 1063", 0.0], ["GLY 1064", 0.0], ["TYR 1065", 0.0], ["GLY 1066", 0.03], ["PHE 1067", 0.09], ["ARG 1068", 0.14], ["VAL 1069", 0.0], ["ILE 1070", 0.0], ["THR 1071", 0.05], ["GLU 1072", 0.08], ["LYS 1073", 1.78], ["CYS 1074", 1.06], ["PRO 1075", 2.14], ["GLU 1076", 1.75], ["GLY 1077", 0.02], ["ILE 1078", 0.15], ["ILE 1079", 2.23], ["LEU 1080", 1.6], ["LEU 1081", 0.01], ["LEU 1082", 0.21], ["VAL 1083", 1.99], ["GLN 1084", 0.03], ["ALA 1085", 0.0], ["ILE 1086", 0.65], ["LEU 1087", 2.65], ["GLY 1088", 0.0], ["SER 1089", 0.0], ["ILE 1090", 1.12], ["VAL 1091", 1.11], ["ASN 1092", 0.0], ["ALA 1093", 0.01], ["PHE 1094", 1.81], ["MET 1095", 0.02], ["VAL 1096", 0.0], ["GLY 1097", 0.0], ["CYS 1098", 0.01], ["MET 1099", 1.25], ["PHE 1100", 0.0], ["VAL 1101", 0.0], ["LYS 1102", 2.16], ["ILE 1103", 1.86], ["SER 1104", 0.26], ["GLN 1105", 1.76], ["PRO 1106", 0.2], ["LYS 1107", 1.24], ["LYS 1108", 2.69], ["ARG 1109", 0.03], ["ALA 1110", 0.04], ["GLU 1111", 0.48], ["THR 1112", 0.01], ["LEU 1113", 0.0], ["MET 1114", 0.0], ["PHE 1115", 0.0], ["SER 1116", 0.0], ["ASN 1117", 0.0], ["ASN 1118", 0.0], ["ALA 1119", 0.0], ["VAL 1120", 0.0], ["ILE 1121", 0.0], ["SER 1122", 0.0], ["MET 1123", 0.0], ["ARG 1124", 0.0], ["ASP 1125", 0.0], ["GLU 1126", 0.0], ["LYS 1127", 0.0], ["LEU 1128", 0.0], ["CYS 1129", 0.0], ["LEU 1130", 0.0], ["MET 1131", 0.0], ["PHE 1132", 0.0], ["ARG 1133", 0.0], ["VAL 1134", 0.0], ["GLY 1135", 0.0], ["ASP 1136", 0.0], ["LEU 1137", 0.04], ["ARG 1138", 1.56], ["ASN 1139", 0.38], ["SER 1140", 0.02], ["HIS 1141", 0.1], ["ILE 1142", 0.0], ["VAL 1143", 0.0], ["GLU 1144", 0.0], ["ALA 1145", 0.0], ["SER 1146", 0.0], ["ILE 1147", 0.0], ["ARG 1148", 0.0], ["ALA 1149", 0.0], ["LYS 1150", 0.0], ["LEU 1151", 0.0], ["ILE 1152", 0.0], ["LYS 1153", 0.0], ["SER 1154", 0.0], ["ARG 1155", 0.0], ["GLN 1156", 0.0], ["THR 1157", 0.0], ["LYS 1158", 0.0], ["GLU 1159", 0.0], ["GLY 1160", 0.0], ["GLU 1161", 0.0], ["PHE 1162", 0.0], ["ILE 1163", 0.0], ["PRO 1164", 0.0], ["LEU 1165", 0.0], ["ASN 1166", 0.0], ["GLN 1167", 0.0], ["THR 1168", 0.0], ["ASP 1169", 0.0], ["ILE 1170", 0.0], ["ASN 1171", 0.0], ["VAL 1172", 0.0], ["GLY 1173", 0.0], ["PHE 1174", 0.0], ["ASP 1175", 0.0], ["THR 1176", 0.0], ["GLY 1177", 0.0], ["ASP 1178", 0.0], ["ASP 1179", 0.0], ["ARG 1180", 0.0], ["LEU 1181", 0.0], ["PHE 1182", 0.0], ["LEU 1183", 0.0], ["VAL 1184", 0.0], ["SER 1185", 0.0], ["PRO 1186", 0.0], ["LEU 1187", 0.0], ["ILE 1188", 0.0], ["ILE 1189", 0.0], ["SER 1190", 0.0], ["HIS 1191", 0.0], ["GLU 1192", 0.0], ["ILE 1193", 0.0], ["ASN 1194", 0.0], ["GLU 1195", 0.0], ["LYS 1196", 0.0], ["SER 1197", 0.0], ["PRO 1198", 0.0], ["PHE 1199", 0.0], ["TRP 1200", 0.0], ["GLU 1201", 0.0], ["MET 1202", 0.0], ["SER 1203", 0.0], ["ARG 1204", 0.0], ["ALA 1205", 0.0], ["GLN 1206", 0.0], ["LEU 1207", 0.0], ["GLU 1208", 0.0], ["GLN 1209", 0.0], ["GLU 1210", 0.0], ["GLU 1211", 0.0], ["PHE 1212", 0.0], ["GLU 1213", 0.0], ["VAL 1214", 0.0], ["VAL 1215", 0.0], ["VAL 1216", 0.0], ["ILE 1217", 0.0], ["LEU 1218", 0.0], ["GLU 1219", 0.0], ["GLY 1220", 0.0], ["MET 1221", 0.0], ["VAL 1222", 0.0], ["GLU 1223", 0.01], ["ALA 1224", 0.02], ["THR 1225", 0.0], ["GLY 1226", 0.0], ["MET 1227", 0.0], ["THR 1228", 0.0], ["CYS 1229", 0.0], ["GLN 1230", 0.0], ["ALA 1231", 0.0], ["ARG 1232", 0.0], ["SER 1233", 0.0], ["SER 1234", 0.0], ["TYR 1235", 0.0], ["MET 1236", 0.0], ["ASP 1237", 0.0], ["THR 1238", 0.0], ["GLU 1239", 0.0], ["VAL 1240", 0.0], ["LEU 1241", 0.0], ["TRP 1242", 0.0], ["GLY 1243", 0.0], ["HIS 1244", 0.0], ["ARG 1245", 0.0], ["PHE 1246", 0.0], ["THR 1247", 0.0], ["PRO 1248", 0.0], ["VAL 1249", 0.0], ["LEU 1250", 0.0], ["THR 1251", 0.0], ["LEU 1252", 0.0], ["GLU 1253", 0.0], ["LYS 1254", 0.05], ["GLY 1255", 0.01], ["PHE 1256", 0.44], ["TYR 1257", 0.0], ["GLU 1258", 0.07], ["VAL 1259", 0.0], ["ASP 1260", 0.02], ["TYR 1261", 0.02], ["ASN 1262", 0.03], ["THR 1263", 0.0], ["PHE 1264", 0.0], ["HIS 1265", 0.0], ["ASP 1266", 0.0], ["THR 1267", 0.0], ["TYR 1268", 0.0], ["GLU 1269", 0.0], ["THR 1270", 0.0], ["ASN 1271", 0.0], ["THR 1272", 0.0], ["PRO 1273", 0.0], ["SER 1274", 0.0], ["CYS 1275", 0.0], ["CYS 1276", 0.0], ["ALA 1277", 0.0], ["LYS 1278", 0.0], ["GLU 1279", 0.0], ["LEU 1280", 0.0], ["ALA 1281", 0.0], ["GLU 1282", 0.0], ["MET 1283", 0.0]]}} \ No newline at end of file diff --git a/prolint2/server/home.tpl b/prolint2/server/home.tpl deleted file mode 100644 index 0e1fb07..0000000 --- a/prolint2/server/home.tpl +++ /dev/null @@ -1,3 +0,0 @@ -%#template for the form for a new task -

WebApps List

- \ No newline at end of file diff --git a/prolint2/server/index.html b/prolint2/server/index.html index f76f476..3212d0d 100644 --- a/prolint2/server/index.html +++ b/prolint2/server/index.html @@ -50,6 +50,19 @@

ProLint2 Dashboard

+
+
+ +
+
+
diff --git a/prolint2/server/server.py b/prolint2/server/server.py index b9a285f..25dab1a 100644 --- a/prolint2/server/server.py +++ b/prolint2/server/server.py @@ -1,424 +1,410 @@ -from collections import Counter -from prolint2.interactive_sel import interactive_selection import os import ast -import json - -from bottle import route, run, template, debug, static_file, request -from prolint2.contacts import SerialDistances +from io import StringIO +from collections import Counter import MDAnalysis as mda -from prolint2.prolint2 import PL2 -from io import StringIO +from bottle import Bottle, redirect, static_file -from .chord_utils import contact_chord +from prolint2.core.universe import Universe +from prolint2.server.chord_utils import contact_chord +from prolint2.interactive_sel import interactive_selection +from prolint2.computers.payload import ServerPayload +from prolint2.computers.distances import SerialDistances SERVER_PATH = os.path.abspath(os.path.dirname(__file__)) -BACKEND_DATA = None -TS = None -ARGS = None -data = None -data_loaded = False - - -def sort_lipids(ts): +class ProLintDashboard: """ - Sort lipid contacts according to their contact frequency, all the while keeping track - of residue IDs, and number of contacts with each residue. - - Returns: - t (dict): Stores lipid IDs and their contact frequency, sorted in descending order - g (dict): For each lipid ID, stores the residues in contact and the corresponding - frequency. + A dashboard for ProLint2. It is a fully functional web application that can be used to + visualize the results of the ProLint2 analysis. """ - - def sort_tuple(tup): - tup.sort(key=lambda x: x[1], reverse=True) - return tup - - # TODO: - # top lipid number should be put in the config. - contact_threshold = ts.n_frames * 0.05 - - # initialize dictionary to store values: - t = {k: {} for k in ts.database_unique} - g = {} - for ix, (residue, lipid_contacts) in enumerate(ts.contacts.contacts.items()): - for lipid, contact_counter in lipid_contacts.items(): - top10_counter = contact_counter.most_common() - for (lipid_id, lipid_counter) in top10_counter: - # Exclude short-lived contacts - if lipid_counter <= contact_threshold: + def __init__(self, port=8351, debug_bool=False, reloader=False): + self.backend_data = None + self.ts = None + self.contacts = None + self.payload = None + self.args = None + self.data = None + self.data_loaded = False + self.port = port + self.debug_bool = debug_bool + self.reloader = reloader + self.response = None + self.app = Bottle() + self.setup_routes() + + def setup_routes(self): + """ + Setup the routes for the dashboard. + """ + self.app.route("/", method="GET", callback=self.redirect_to_app) + self.app.route("/app", method="GET", callback=self.serve_app) + self.app.route("/static/", method="GET", callback=self.server_static) + self.app.route("/data/", method="GET", callback=self.serve_data) + self.app.route("/pdb/", method="GET", callback=self.serve_pdb) + self.app.route("/network/", method="GET", callback=self.serve_network) + self.app.route("/tabledata/", method="GET", callback=self.serve_table_data) + self.app.route("/toplipids/", method="GET", callback=self.serve_top_lipids) + self.app.route("/distance/", method="GET", callback=self.serve_distance_array) + self.app.route("/metric/", method="GET", callback=self.update_metric) + + def serve_app(self): + """ + Serve the main application. + """ + return static_file("index.html", root=SERVER_PATH) + + def server_static(self, filepath): + """ + Serve static files. + """ + return static_file(filepath, root=os.path.join(SERVER_PATH, "static")) + + def redirect_to_app(self): + """ + Redirect to the main application. + """ + redirect("/app") + + def serve_pdb(self, metadata): + """ + Serve the PDB file to the client for use with Mol*. + """ + u = mda.Universe(self.args.structure, self.args.trajectory) + protein = u.select_atoms("protein") + pstream = mda.lib.util.NamedStream(StringIO(), "dummy.pdb") + with mda.Writer(pstream, format="PDB") as w: + w.write(protein) + + return pstream.read() + + def get_gantt_app_data(self, g, lipid_id, residues_to_show=15, intervals_to_filter_out=10): + """ + Get the data for the Gantt chart in the application. + + Args: + g (dict): For each lipid ID, stores the residues in contact and the corresponding + frequency. + lipid_id (str): The lipid ID to use. + residues_to_show (int): The number of residues to show in the Gantt chart. + intervals_to_filter_out (int): The number of frames to filter out. + + Returns: + gantt_data (list): A list of dictionaries containing the data for the Gantt chart. + categories (list): A list of residue IDs. + """ + + gantt_data = [] + for res, _ in g[lipid_id][:residues_to_show]: + frame_numbers = self.contacts.contact_frames[res][lipid_id] + frame_intervals = self.get_frame_contact_intervals(frame_numbers) + for start, end in frame_intervals: + if end - start < intervals_to_filter_out: continue - if lipid_id in t[lipid]: - t[lipid][lipid_id] += lipid_counter - g[lipid_id].append((residue, lipid_counter)) - else: - t[lipid][lipid_id] = lipid_counter - g[lipid_id] = [(residue, lipid_counter)] - - for lipid, values in t.items(): - t[lipid] = Counter(values).most_common() - - # for lipid, values in g.items(): - for lipid_id, vals in g.items(): - g[lipid_id] = sort_tuple(vals) - - return t, g - - -def get_frame_contact_intervals(frames, tolerance=6): - """ - Get frame ranges - """ - ranges_collect = [] - range_start = 0 - for ix, el in enumerate(frames): - if ix == 0: - range_start = el - continue - - prev_el = frames[ix - 1] - if not el - tolerance <= prev_el: - ranges_collect.append((range_start, prev_el)) - range_start = el - if ix == len(frames) - 1: - ranges_collect.append((range_start, el)) - return ranges_collect - - -def get_gantt_app_data(g, lipid_id, residues_to_show=15, intervals_to_filter_out=10): - gantt_data = [] - for res, _ in g[lipid_id][:residues_to_show]: - frame_numbers = TS.contacts.contact_frames[f"{res},{lipid_id}"] - frame_intervals = get_frame_contact_intervals(frame_numbers) - for start, end in frame_intervals: - if end - start < intervals_to_filter_out: + gantt_data.append( + { + "category": res, + "startFrame": start, + "endFrame": end, + "lipid_id": lipid_id, + } + ) + + # TODO: + # `categories` is now just the `gantt_data` keys. + # replace with: `list(gantt_data.keys())` or remove entirely + categories = [] + for y in [x["category"] for x in gantt_data]: + if y not in categories: + categories.append(y) + return gantt_data, categories + + def sort_lipids(self): + """ + Sort lipid contacts according to their contact frequency, all the while keeping track + of residue IDs, and number of contacts with each residue. + + Args: + ts (PL2): The ProLint2 object. + + Returns: + lipid_frequency (dict): Stores lipid IDs and their contact frequency, sorted in descending order + residue_contact_freq (dict): For each lipid ID, stores the residues in contact and the corresponding + frequency. + + """ + + def sort_by_frequency(contact_list): + contact_list.sort(key=lambda x: x[1], reverse=True) + return contact_list + + # TODO: + # top lipid number should be put in the config. + # contact_threshold = self.ts.trajectory.n_frames * 0.05 + contact_threshold = 0 + + # Initialize dictionaries to store values: + lipid_frequency = {lipid: {} for lipid in self.ts.database.unique_resnames} + residue_contact_freq = {} + + for residue, lipid_contacts in self.contacts.compute_metric('sum').items(): + for lipid, contact_counter in lipid_contacts.items(): + # Sort the contact_counter dictionary by its values + sorted_contacts = sorted(contact_counter.items(), key=lambda x: x[1], reverse=True) + + for lipid_id, freq in sorted_contacts: + # Exclude short-lived contacts + if freq <= contact_threshold: + continue + + # Update lipid_frequency + if lipid_id in lipid_frequency[lipid]: + lipid_frequency[lipid][int(lipid_id)] += freq + else: + lipid_frequency[lipid][int(lipid_id)] = freq + + # Update residue_contact_freq + if int(lipid_id) in residue_contact_freq: + residue_contact_freq[int(lipid_id)].append((int(residue), freq)) + else: + residue_contact_freq[int(lipid_id)] = [(int(residue), freq)] + + for lipid, values in lipid_frequency.items(): + lipid_frequency[lipid] = Counter(values).most_common() + + for lipid_id, vals in residue_contact_freq.items(): + residue_contact_freq[lipid_id] = sort_by_frequency(vals) + + return lipid_frequency, residue_contact_freq + + + @staticmethod + def get_frame_contact_intervals(frames, tolerance=6): + """ + Get the intervals of frames in which a contact is present. + + Args: + frames (list): A list of frames in which a contact is present. + tolerance (int): The number of frames to tolerate before considering a new interval. + + Returns: + ranges_collect (list): A list of tuples containing the start and end frames of each + interval. + + """ + ranges_collect = [] + range_start = 0 + for ix, el in enumerate(frames): + if ix == 0: + range_start = el continue - gantt_data.append( - { - # "category": f'{res}', - "category": res, - "startFrame": start, - "endFrame": end, - "lipid_id": lipid_id, - } - ) - - # TODO: - # `categories` is now just the `gantt_data` keys. - # replace with: `list(gantt_data.keys())` or remove entirely - categories = [] - for y in [x["category"] for x in gantt_data]: - if y not in categories: - categories.append(y) - return gantt_data, categories - - -@route("/static/") -def server_static(filepath): - return static_file(filepath, root=os.path.join(SERVER_PATH, "static")) - - -@route("/") -def index(): - return template(os.path.join(SERVER_PATH, "home.tpl")) - -@route("/app") -def app(): - return static_file("index.html", root=SERVER_PATH) - - -@route("/prolint2") -def prolint2(): - import sys - - print(request.body.getvalue().decode("utf-8"), file=sys.stdout) - return request.body - - -@route("/toplipids/:metadata") -def top_lipid_listener(metadata): - global BACKEND_DATA - - metadata = ast.literal_eval(metadata) - lipid_id = metadata["lipidID"] - - gantt_data, categories = get_gantt_app_data( - BACKEND_DATA["lipid_contact_frames"], lipid_id - ) - # This will sort the residues - # sorted_gantt_data = sorted(gantt_data, key=lambda d: d['category']) - - # ags = TS.query.selected.select_atoms(f'resid {" ".join([str(x) for x in categories])}') - # labeled_categories = [[int(x.resid), x.resname] for x in ags] - return { - "ganttData": gantt_data, - "topLipids": categories, - } - - -@route("/distance/:metadata") -def distance_array_listener(metadata): - global BACKEND_DATA - global TS + prev_el = frames[ix - 1] + if not el - tolerance <= prev_el: + ranges_collect.append((range_start, prev_el)) + range_start = el + if ix == len(frames) - 1: + ranges_collect.append((range_start, el)) + return ranges_collect + + def serve_data(self, metadata): + """ + Serve the data to the client for use with the application. + """ + + metadata = ast.literal_eval(metadata) + + lipid = metadata["lipid"] + protein = metadata["protein"] + metric = metadata.get('metric', '') + + if lipid == "" and protein == "": + # Starting setup: + lipid = self.backend_data["lipids"][0] + protein = self.backend_data["proteins"][0] + + table_data = [] + for ix, (lipid_id, freq) in enumerate(self.backend_data["top_lipids"][lipid]): + table_data.append({"id": ix, "lipidID": lipid_id, "contactFrequency": freq}) + + # Initiate ganttApp with the top lipid data + lipid_id = self.backend_data["top_lipids"][lipid][0][0] + gantt_data, categories = self.get_gantt_app_data( + self.backend_data["lipid_contact_frames"], lipid_id + ) - metadata = ast.literal_eval(metadata) - lipid_id = metadata["lipidID"] - residue_id = int(metadata["residueID"]) + # Initiate heatmapApp with the top residue + residue_id = self.backend_data["lipid_contact_frames"][lipid_id][0][0] + ri = SerialDistances( + self.ts.query.universe, + self.ts.query, + self.ts.database, + lipid_id, + residue_id, + self.contacts.contact_frames[residue_id][lipid_id], + ) + ri.run(verbose=False) + + hm_data, la_data = [], [] + for lx, la in enumerate(ri.lipid_atomnames): + la_data.append({"LipidAtoms": la}) + for rx, ra in enumerate(ri.resid_atomnames): + v = ri.distance_array[lx, rx] + hm_data.append({"LipidAtoms": la, "ResidueAtoms": ra, "value": float(v)}) + ra_data = [{"ResidueAtoms": x} for x in ri.resid_atomnames] + + # TODO: + # Possibly, avoid single point of failure on these dictionary lookups? + response = { + "data": self.backend_data["data"][protein][lipid], + "proteins": self.backend_data["proteins"], + "lipids": self.backend_data["lipids"], + "pieData": self.backend_data["pie_data"], + "ganttData": gantt_data, + "topLipids": categories, + "globalTopLipids": self.backend_data["top_lipids"], + "lipidContactFrames": self.backend_data["lipid_contact_frames"], + "tableData": table_data, + "heatmapData": hm_data, + "lipidAtomsData": la_data, + "residueAtomsData": ra_data, + "frameNumber": self.ts.trajectory.n_frames, + } + self.response = response + return response + + def update_metric(self, metadata): + """ + Update the metric used for the backend data. + """ + metadata = ast.literal_eval(metadata) + + lipid = metadata["lipid"] + metric = metadata["metric"] + + residue_contacts = self.payload.residue_contacts(lipid_type=lipid, metric=metric) + + self.response['data'] = residue_contacts[lipid] + return self.response + + + def serve_network(self, metadata): + """ + Serve the data to the client for use with the network application. + """ + metadata = ast.literal_eval(metadata) + lipid = metadata['lipid'] + + top_lipid_ids = [x[0] for x in self.backend_data['top_lipids'][lipid]] + chord_elements, hidden_node_indices, per_lipid_nodes = contact_chord( + self.ts, + self.contacts, + top_lipid_ids, + self.backend_data['lipid_contact_frames'], + cutoff=100 + ) - ri = SerialDistances( - TS.query.selected.universe, - TS.query.selected, - TS.database.selected, - lipid_id, - residue_id, - TS.contacts.contact_frames[f"{residue_id},{lipid_id}"], - ) - ri.run(verbose=False) + return { + "chordElements": chord_elements, + "positionResidues": hidden_node_indices, + "lipidNodes": per_lipid_nodes + } - hm_data, la_data = [], [] - for lx, la in enumerate(ri.lipid_atomnames): - la_data.append({"LipidAtoms": la}) - for rx, ra in enumerate(ri.resid_atomnames): - v = ri.distance_array[lx, rx] - hm_data.append({"LipidAtoms": la, "ResidueAtoms": ra, "value": float(v)}) - ra_data = [{"ResidueAtoms": x} for x in ri.resid_atomnames] + def serve_table_data(self, metadata): + """ + Serve the data to the client for use with the table application. + """ + metadata = ast.literal_eval(metadata) + lipid = metadata["lipid"] - return { - "heatmapData": hm_data, - "lipidAtomsData": la_data, - "residueAtomsData": ra_data, - } + table_data = [] + for ix, (lipid_id, freq) in enumerate(self.backend_data["top_lipids"][lipid]): + table_data.append({"id": ix, "lipidID": lipid_id, "contactFrequency": freq}) + return { + "tableData": table_data, + } -@route("/tabledata/:metadata") -def table_listener(metadata): - global BACKEND_DATA + def serve_top_lipids(self, metadata): + """ + Serve the data to the client for use with the gantt application. + """ + metadata = ast.literal_eval(metadata) + lipid_id = metadata["lipidID"] - metadata = ast.literal_eval(metadata) - lipid = metadata["lipid"] + gantt_data, categories = self.get_gantt_app_data( + self.backend_data["lipid_contact_frames"], lipid_id + ) - table_data = [] - for ix, (lipid_id, freq) in enumerate(BACKEND_DATA["top_lipids"][lipid]): - table_data.append({"id": ix, "lipidID": lipid_id, "contactFrequency": freq}) + return { + "ganttData": gantt_data, + "topLipids": categories, + } - return { - "tableData": table_data, - } + def serve_distance_array(self, metadata): + """ + Serve the data to the client for use with the heatmap application. + """ + metadata = ast.literal_eval(metadata) + lipid_id = metadata["lipidID"] + residue_id = int(metadata["residueID"]) + + ri = SerialDistances( + self.ts.query.universe, + self.ts.query, + self.ts.database, + lipid_id, + residue_id, + self.contacts.contact_frames[residue_id][lipid_id], + ) + ri.run(verbose=False) + + hm_data, la_data = [], [] + for lx, la in enumerate(ri.lipid_atomnames): + la_data.append({"LipidAtoms": la}) + for rx, ra in enumerate(ri.resid_atomnames): + v = ri.distance_array[lx, rx] + hm_data.append({"LipidAtoms": la, "ResidueAtoms": ra, "value": float(v)}) + ra_data = [{"ResidueAtoms": x} for x in ri.resid_atomnames] + + return { + "heatmapData": hm_data, + "lipidAtomsData": la_data, + "residueAtomsData": ra_data, + } + def start_server(self, payload=None): + """ + Start the server. + """ + if payload is None: + import sys # pylint: disable=import-outside-toplevel + print ("Please provide a payload") + sys.exit(1) -@route("/pdb/:metadata") -def blob(metadata): + self.args = payload + self.ts = Universe(self.args.structure, self.args.trajectory) - global ARGS - u = mda.Universe(ARGS.structure, ARGS.trajectory) - protein = u.select_atoms("protein") - pstream = mda.lib.util.NamedStream(StringIO(), "dummy.pdb") - with mda.Writer(pstream, format="PDB") as w: - w.write(protein) + if self.args.i_bool: + self.ts = interactive_selection(self.ts) + self.contacts = self.ts.compute_contacts(cutoff=self.args.cutoff) - return pstream.read() + if self.args.e_file: + self.contacts.export(self.args.e_file) -@route('/network/:metadata') -def network_listener(metadata): - global BACKEND_DATA - global TS - - metadata = ast.literal_eval(metadata) - lipid = metadata['lipid'] + # payload = self.contacts.server_payload() + self.payload = ServerPayload(self.contacts, self.ts) + payload = self.payload.payload - top_lipid_ids = [x[0] for x in BACKEND_DATA['top_lipids'][lipid]] - chord_elements, hidden_node_indices, per_lipid_nodes = contact_chord( - TS, - top_lipid_ids, - BACKEND_DATA['lipid_contact_frames'], - cutoff=100 - ) + lipid_frequency, residue_contact_freq = self.sort_lipids() + payload["top_lipids"] = lipid_frequency + payload["lipid_contact_frames"] = residue_contact_freq - return { - "chordElements": chord_elements, - "positionResidues": hidden_node_indices, - "lipidNodes": per_lipid_nodes - } - -@route('/data/:metadata') -def listener(metadata): - - global data_loaded - global data - global BACKEND_DATA - global TS - - # TODO: - # Bottle should provide the metadata already, - # perhaps via the following: - # from bottle import response, request - metadata = ast.literal_eval(metadata) - - lipid = metadata["lipid"] - protein = metadata["protein"] - - if lipid == "" and protein == "": - # Starting setup: - try: - lipid = BACKEND_DATA["lipids"][0] - protein = BACKEND_DATA["proteins"][0] - except: - print("Detached EXECUTION") - print("This is currently meant for testing only. Not guaranteed to work!") - BACKEND_DATA = independent_execution() - lipid = BACKEND_DATA["lipids"][0] - protein = BACKEND_DATA["proteins"][0] - - table_data = [] - for ix, (lipid_id, freq) in enumerate(BACKEND_DATA["top_lipids"][lipid]): - table_data.append({"id": ix, "lipidID": lipid_id, "contactFrequency": freq}) - - # Initiate ganttApp with the top lipid - lipid_id = BACKEND_DATA["top_lipids"][lipid][0][0] - gantt_data, categories = get_gantt_app_data( - BACKEND_DATA["lipid_contact_frames"], lipid_id - ) - - # Initiate heatmapApp with the top residue - residue_id = BACKEND_DATA["lipid_contact_frames"][lipid_id][0][0] - ri = SerialDistances( - TS.query.selected.universe, - TS.query.selected, - TS.database.selected, - lipid_id, - residue_id, - TS.contacts.contact_frames[f"{residue_id},{lipid_id}"], - ) - ri.run(verbose=False) - - hm_data, la_data = [], [] - for lx, la in enumerate(ri.lipid_atomnames): - la_data.append({"LipidAtoms": la}) - for rx, ra in enumerate(ri.resid_atomnames): - v = ri.distance_array[lx, rx] - hm_data.append({"LipidAtoms": la, "ResidueAtoms": ra, "value": float(v)}) - ra_data = [{"ResidueAtoms": x} for x in ri.resid_atomnames] - - # TODO: - # Possibly, avoid single point of failure on these dictionary lookups? - response = { - "data": BACKEND_DATA["data"][protein][lipid], - "proteins": BACKEND_DATA["proteins"], - "lipids": BACKEND_DATA["lipids"], - "pieData": BACKEND_DATA["pie_data"], - "ganttData": gantt_data, - "topLipids": categories, - "globalTopLipids": BACKEND_DATA["top_lipids"], - "lipidContactFrames": BACKEND_DATA["lipid_contact_frames"], - "tableData": table_data, - "heatmapData": hm_data, - "lipidAtomsData": la_data, - "residueAtomsData": ra_data, - "frameNumber": TS.n_frames, - } - return response - - -def start_server( - payload=None, debug_bool=False, reloader=True, port=8351, i_bool=True, e_file=False -): - - global ARGS - # ProLint2 calls: - args = payload - ARGS = args - ts = PL2(args.structure, args.trajectory, add_lipid_types=args.other_lipids) - # For interactive selection of the groups for the contacts calculation - if i_bool: - ts = interactive_selection(ts) - ts.contacts.compute(cutoff=args.cutoff) - - # for exporting the data - if e_file: - ts.contacts.export(args.e_file) - - payload = ts.contacts.server_payload() - - t, g = sort_lipids(ts) - payload["top_lipids"] = t - payload["lipid_contact_frames"] = g - - # Make data accessible globally - global BACKEND_DATA - global TS - BACKEND_DATA = payload - TS = ts - - debug(debug_bool) - run(reloader=reloader, host="localhost", port=port) - - -def independent_execution(): - """ - If we are not calling the server through the prolint2 executable, but - independently, locally for testing purposes, we will load local data file - and serve that to the dashboard. - """ - with open(os.path.join(SERVER_PATH, "girk.json"), "r") as fp: - data = json.load(fp) - - pie_data = [ - { - "category": "LocalGirk", - "value": 500, - "subData": [ - {"category": "CHOL", "value": 300}, - {"category": "POPE", "value": 150}, - {"category": "POPS", "value": 50}, - ], - } - ] - - # ganttApp data input requirement - gantt_data = [ - { - "category": "Lipid 1", - "startFrame": 0, - "endFrame": 10, - }, - { - "category": "Lipid 1", - "startFrame": 45, - "endFrame": 75, - }, - { - "category": "Lipid 1", - "startFrame": 90, - "endFrame": 100, - }, - { - "category": "Lipid 2", - "startFrame": 10, - "endFrame": 35, - }, - { - "category": "Lipid 2", - "startFrame": 45, - "endFrame": 60, - }, - ] - top_10_lipids = ["Lipid 1", "Lipid 2"] - - payload = { - "data": data, - "proteins": ["LocalGirk"], - "lipids": list(data["LocalGirk"].keys()), - "pie_data": pie_data, - "gantt_data": gantt_data, - "top_10_lipids": top_10_lipids, - } - - return payload + self.backend_data = payload + self.app.run(reloader=self.reloader, host="localhost", port=self.port, debug=self.debug_bool) if __name__ == "__main__": - start_server(debug_bool=True) + app = ProLintDashboard(debug_bool=True) + app.start_server() diff --git a/prolint2/server/static/css/main.css b/prolint2/server/static/css/main.css index bc55944..74e69e8 100644 --- a/prolint2/server/static/css/main.css +++ b/prolint2/server/static/css/main.css @@ -76,7 +76,28 @@ place-content: center; background: #DAEDFB; } */ -button { +.dropdown-wrapper { + display: flex; + align-items: center; + top: 15px; +} + +.dropdown-label { + margin-right: 0.5em; + font-family: system-ui, sans-serif; + font-weight: bold; + font-size: 1.5rem; + color: var(--c); +} + +.custom-dropdown { + width: 250px; + appearance: none; + /* Add the remaining styles from your previous custom-select rule */ +} + + +button, select { --b: 1.5px; /* border thickness */ --s: 0.02em; /* size of the corner */ --c: #bd5532; @@ -96,7 +117,9 @@ button { outline-offset: 0.2em; } button:hover, -button:focus-visible { +button:focus-visible, +select:hover, +select:focus-visible { --_p: 0px; outline-color: var(--c); outline-offset: 0.05em; @@ -106,7 +129,7 @@ button:active { color: #fff; } -button { +button, select { font-family: system-ui, sans-serif; font-weight: bold; font-size: 1.5rem; @@ -114,3 +137,17 @@ button { border: none; margin: 0.1em; } + +select option { + background-color: #ffffff; + color: var(--c); + font-family: system-ui, sans-serif; + font-weight: bold; + font-size: 1.5rem; + padding: 0.1em 0.3em; +} + +select option:hover { + background-color: var(--c); + color: #ffffff; +} diff --git a/prolint2/server/static/js/app.js b/prolint2/server/static/js/app.js index 376268b..253ce3a 100644 --- a/prolint2/server/static/js/app.js +++ b/prolint2/server/static/js/app.js @@ -17,6 +17,7 @@ import { heatmapApp } from "./heatmapApp.js"; import { pieApp } from "./pie.js"; import { tableApp } from "./table.js"; import { timeSeriesApp } from "./timeseries.js"; +import { getTimeData } from "./timeseries.js"; // NOTES (and TODO:) // 1. // The network app is always redrawn (rather than updated) @@ -28,53 +29,103 @@ import { timeSeriesApp } from "./timeseries.js"; // Fetch the data from the backend var obj = { - "lipid": "", - "protein": "" -} - -fetch('/data/' + JSON.stringify(obj)) - .then(response => response.json()) - .then(responseData => { - - console.log('responseData', responseData) - - var contactData = responseData['data']; - var lipids = responseData['lipids']; - - var rootReferenceObjects = radarApp() - - var heatmap = heatmapApp(responseData); - var timeSeries = timeSeriesApp(contactData); - var ganttReturnValue = ganttApp(responseData, heatmap); - var networkRootReference = networkApp(subSeries, lipids[0]) - var table = tableApp(responseData, ganttReturnValue, heatmap, networkRootReference) - - rootReferenceObjects["series"].data.setAll(contactData); - rootReferenceObjects["categoryAxis"].data.setAll(contactData); - rootReferenceObjects["createRange"](lipids[0], contactData, 0); - - rootReferenceObjects["series"].appear(100); - rootReferenceObjects["chart"].appear(100); - - var [pieRoot, subSeries] = pieApp(table, ganttReturnValue, heatmap, timeSeries, networkRootReference, responseData, rootReferenceObjects); - - /////////////////////////////////////////// - ////////////// Hide Logos ///////////////// - /////////////////////////////////////////// - const logos_to_keep_for_ids = ["chartdiv_x", "chartdiv_y", "chartdiv_z"] - am5.array.each(am5.registry.rootElements, function (rootElement) { - if (logos_to_keep_for_ids.includes(rootElement.dom.id)) { - return - } - rootElement.events.on("framestarted", function () { - var rootChildren = rootElement.tooltipContainer.allChildren() - for (let ix = 0; ix < rootChildren.length; ix++) { - var el = rootChildren[ix]; - if (el._settings.tooltipText == "Created using amCharts 5") { - el.set('visible', false) - } + lipid: "", + protein: "", +}; + +fetch("/data/" + JSON.stringify(obj)) + .then((response) => response.json()) + .then((responseData) => { + + var contactData = responseData["data"]; + console.log('responseData', responseData) + var lipids = responseData["lipids"]; + + var rootReferenceObjects = radarApp(); + + var heatmap = heatmapApp(responseData); + var timeSeries = timeSeriesApp(contactData); + var ganttReturnValue = ganttApp(responseData, heatmap); + var networkRootReference = networkApp(subSeries, lipids[0]); + var table = tableApp( + responseData, + ganttReturnValue, + heatmap, + networkRootReference + ); + + rootReferenceObjects["series"].data.setAll(contactData); + rootReferenceObjects["categoryAxis"].data.setAll(contactData); + rootReferenceObjects["createRange"](lipids[0], contactData, 0); + + rootReferenceObjects["series"].appear(100); + rootReferenceObjects["chart"].appear(100); + + var [pieRoot, subSeries] = pieApp( + table, + ganttReturnValue, + heatmap, + timeSeries, + networkRootReference, + responseData, + rootReferenceObjects + ); + + function updateMetric() { + subSeries.slices.values.forEach(function (slice) { + if (slice.get("active")) { + + var lipid = slice.dataItem.dataContext.category; + obj.protein = "Protein"; + obj.lipid = lipid; + obj.metric = document.getElementById("metric_button").value; + + fetch("/metric/" + JSON.stringify(obj)) + .then((response) => response.json()) + .then((responseData) => { + var updateData = responseData["data"]; + + rootReferenceObjects["series"].data.setAll(updateData); + rootReferenceObjects["categoryAxis"].data.setAll(updateData); + rootReferenceObjects["createRange"](lipid, updateData, 0); + + am5.array.each(subSeries.dataItems, function (dataItem, ix) { + if (dataItem.dataContext.category == lipid) { + var col = subSeries.get("colors").getIndex(ix); + rootReferenceObjects["axisRange"] + .get("axisFill") + .set("fill", col); } + }); + + var [xAxis, series] = timeSeries; + var timeData = getTimeData(updateData); + xAxis.data.setAll(timeData); + series.data.setAll(timeData); }); - }); + } + }); + } + + var selectElement = document.getElementById("metric_button"); + selectElement.addEventListener("change", updateMetric); - }); \ No newline at end of file + /////////////////////////////////////////// + ////////////// Hide Logos ///////////////// + /////////////////////////////////////////// + const logos_to_keep_for_ids = ["chartdiv_x", "chartdiv_y", "chartdiv_z"]; + am5.array.each(am5.registry.rootElements, function (rootElement) { + if (logos_to_keep_for_ids.includes(rootElement.dom.id)) { + return; + } + rootElement.events.on("framestarted", function () { + var rootChildren = rootElement.tooltipContainer.allChildren(); + for (let ix = 0; ix < rootChildren.length; ix++) { + var el = rootChildren[ix]; + if (el._settings.tooltipText == "Created using amCharts 5") { + el.set("visible", false); + } + } + }); + }); + }); diff --git a/prolint2/server/static/js/pie.js b/prolint2/server/static/js/pie.js index 6791f16..7fa07bb 100644 --- a/prolint2/server/static/js/pie.js +++ b/prolint2/server/static/js/pie.js @@ -161,7 +161,8 @@ export function pieApp(table, ganttReturnValue, heatmap, timeSeries, networkRoot // Update Circular App Data obj.protein = "Protein" obj.lipid = lipid - fetch('/data/' + JSON.stringify(obj)) + obj.metric = document.getElementById("metric_button").value; + fetch('/metric/' + JSON.stringify(obj)) .then(response => response.json()) .then(responseData => { diff --git a/prolint2/server/static/js/table.js b/prolint2/server/static/js/table.js index d238126..c996a36 100644 --- a/prolint2/server/static/js/table.js +++ b/prolint2/server/static/js/table.js @@ -34,6 +34,10 @@ export function tableApp(responseData, ganttReturnValue, heatmap, networkRootRef hozAlign: "center", headerSort: false, resizable: false, + formatter: function(cell) { + return parseFloat(cell.getValue()).toFixed(2); + }, + }, ] }], diff --git a/prolint2/server/static/js/timeseries.js b/prolint2/server/static/js/timeseries.js index 60695b1..357d419 100644 --- a/prolint2/server/static/js/timeseries.js +++ b/prolint2/server/static/js/timeseries.js @@ -140,7 +140,7 @@ export function timeSeriesApp(contactData) { }); }); - series.children.moveValue(series.bulletsContainer, 0); + series.children.moveValue(series.bulletsContainer, series.children.length + 1); series.on("tooltipDataItem", function (tooltipDataItem) { diff --git a/prolint2/server/utils.py b/prolint2/server/utils.py index 5b29446..ae3c1cf 100644 --- a/prolint2/server/utils.py +++ b/prolint2/server/utils.py @@ -22,14 +22,14 @@ def get_frame_contact_intervals(frames, tolerance=6): ranges_collect.append((range_start, el)) return ranges_collect -def calculate_contact_intervals(TS, g, lipid_id, residues_to_show=15, intervals_to_filter_out=10): +def calculate_contact_intervals(contacts, g, lipid_id, residues_to_show=15, intervals_to_filter_out=10): """ TODO: write doc """ contact_intervals = {} for res, _ in g[lipid_id][:residues_to_show]: - frame_numbers = TS.contacts.contact_frames[f'{res},{lipid_id}'] + frame_numbers = contacts.contact_frames[res][lipid_id] frame_intervals = get_frame_contact_intervals(frame_numbers) for start, end in frame_intervals: if end - start < intervals_to_filter_out: @@ -42,14 +42,14 @@ def calculate_contact_intervals(TS, g, lipid_id, residues_to_show=15, intervals_ return contact_intervals -def amCharts_contact_intervals(TS, g, lipid_id, residues_to_show=15, intervals_to_filter_out=10): +def amCharts_contact_intervals(contacts, g, lipid_id, residues_to_show=15, intervals_to_filter_out=10): """ TODO: write doc """ contact_intervals = [] for res, _ in g[lipid_id][:residues_to_show]: - frame_numbers = TS.contacts.contact_frames[f'{res},{lipid_id}'] + frame_numbers = contacts.contact_frames[res][lipid_id] frame_intervals = get_frame_contact_intervals(frame_numbers) for start, end in frame_intervals: if end - start < intervals_to_filter_out: diff --git a/prolint2/tests/test_contacts.py b/prolint2/tests/test_contacts.py deleted file mode 100644 index f6347aa..0000000 --- a/prolint2/tests/test_contacts.py +++ /dev/null @@ -1,122 +0,0 @@ -import numpy as np -import pytest -import MDAnalysis as mda -from prolint2.sampledata import GIRK -import re -from prolint2 import QueryProteins, MembraneDatabase -from prolint2.contacts import SerialContacts, SerialDistances, Contacts - -@pytest.fixture -def universe(): - return mda.Universe(GIRK.coordinates, GIRK.trajectory) - -def test_init_exception(universe): - query = universe.select_atoms("resname LYS") - database = universe.select_atoms("") - cutoff = 8.0 - - with pytest.raises(ValueError, match=re.escape("Invalid selection. Empty AtomGroup(s).")): - contacts = SerialContacts(universe, query, database, cutoff) - - query = universe.select_atoms("") - database = universe.select_atoms("resname CHOL") - cutoff = 8.0 - - with pytest.raises(ValueError, match=re.escape("Invalid selection. Empty AtomGroup(s).")): - contacts = SerialContacts(universe, query, database, cutoff) - - query = universe.select_atoms("resname LYS") - database = universe.select_atoms("resname CHOL") - cutoff = -8.0 - - with pytest.raises(ValueError, match=re.escape("The cutoff must be greater than 0.")): - contacts = SerialContacts(universe, query, database, cutoff) - -def test_single_frame(universe): - query = universe.select_atoms("resname LYS") - database = universe.select_atoms("resname CHOL") - cutoff = 8.0 - - sc = SerialContacts(universe, query, database, cutoff) - sc._prepare() - # sc._single_frame() - - # Test if the right number of contacts is computed - # assert len(sc.contacts) == len(query) - for k in sc.contacts: - assert len(sc.contacts[k]) == len(np.unique(database.resnames)) - - # Test if the contact_frames attribute is correctly computed - assert len(sc.contact_frames) <= len(query) * len(database) - for k, v in sc.contact_frames.items(): - residue_id, lipid_id = map(int, k.split(',')) - lipid_name = database[lipid_id].resname - assert residue_id in sc.contacts - assert lipid_name in sc.contacts[residue_id] - assert lipid_id in sc.contacts[residue_id][lipid_name] - assert len(v) <= len(universe.universe.trajectory) - for frame in v: - assert frame in universe.universe.trajectory.frame_indices - -def test_SerialDistances_instance(universe): - query = universe.select_atoms("resname LYS") - database = universe.select_atoms("resname CHOL") - lipid_id = 0 - residue_id = 0 - frame_filter = np.array([0, 1, 2]) - sd = SerialDistances(universe, query, database, lipid_id, residue_id, frame_filter) - assert isinstance(sd, mda.analysis.base.AnalysisBase) - -def test_SerialDistances_empty_atomgroup_selection(universe): - query = universe.select_atoms("protein") - database = universe.select_atoms("") - lipid_id = 0 - residue_id = 0 - frame_filter = np.array([0, 1, 2]) - with pytest.raises(ValueError, match="Invalid selection. Empty AtomGroup\(s\)."): - sd = SerialDistances(universe, query, database, lipid_id, residue_id, frame_filter) - -def test_SerialDistances_prepare(universe): - query = universe.select_atoms("resname LYS") - database = universe.select_atoms("resname CHOL") - lipid_id = 0 - residue_id = 0 - frame_filter = np.array([0, 1, 2]) - sd = SerialDistances(universe, query, database, lipid_id, residue_id, frame_filter) - sd._prepare() - assert isinstance(sd.result_array, np.ndarray) - assert sd.result_array.shape == (3, len(sd.lipid_atomgroup), len(sd.resid_atomgroup)) - -def test_SerialDistances_single_frame(universe): - query = universe.select_atoms("resname LYS") - database = universe.select_atoms("resname CHOL") - lipid_id = 0 - residue_id = 0 - frame_filter = np.array([0, 1, 2]) - sd = SerialDistances(universe, query, database, lipid_id, residue_id, frame_filter) - sd._prepare() - # sd._single_frame() - assert isinstance(sd.result_array[0], np.ndarray) - -def test_init_method_sets_correct_attributes(universe): - query = universe.select_atoms("resname LYS") - database = universe.select_atoms("resname CHOL") - query = QueryProteins(query) - database = MembraneDatabase(database) - contacts = Contacts(query, database) - - assert contacts.query == query - assert contacts.database == database - assert contacts.cutoff is None - assert contacts.contacts is None - -def test_compute_method_sets_cutoff_attribute(universe): - query = universe.select_atoms("resname LYS") - database = universe.select_atoms("resname CHOL") - query = QueryProteins(query) - database = MembraneDatabase(database) - contacts = Contacts(query, database) - contacts.compute(7) - - assert contacts.cutoff == 7 - diff --git a/prolint2/tests/test_prolint2.py b/prolint2/tests/test_prolint2.py deleted file mode 100644 index b7e2484..0000000 --- a/prolint2/tests/test_prolint2.py +++ /dev/null @@ -1,135 +0,0 @@ -""" -Unit tests for the prolint2 package. -""" - -# Import package, test suite, and other packages as needed -import sys -import configparser -import numpy as np -from prolint2 import get_config -import MDAnalysis as mda -from prolint2 import PL2 -from prolint2 import MembraneDatabase -from prolint2 import QueryProteins -from prolint2.sampledata import GIRK -# Getting the config file -config = configparser.ConfigParser(allow_no_value=True) -config.read(get_config()) -parameters_config = config["Parameters"] - - -def test_prolint2_imported(): - """Sample test, will always pass so long as import statement worked.""" - assert "prolint2" in sys.modules - -def test_prolint2_initialize(): - """Initialize test, will always pass so long as the prolint2.PL2 object can be initialized.""" - assert PL2(GIRK.coordinates, GIRK.trajectory) - -def test_init(): - structure = GIRK.coordinates - trajectory = GIRK.trajectory - add_lipid_types = [] - pl2 = PL2(structure, trajectory, add_lipid_types) - - # Test that the AtomGroup and ResidueGroup have been created - assert isinstance(pl2.atoms, mda.core.groups.AtomGroup) - assert isinstance(pl2.residues, mda.core.groups.ResidueGroup) - -def test_membrane_residues(): - structure = GIRK.coordinates - trajectory = GIRK.trajectory - add_lipid_types = [] - pl2 = PL2(structure, trajectory, add_lipid_types) - - # Test that the correct lipid residues have been added to the macros attribute - lipid_types = parameters_config["lipid_types"].split(", ") - lipid_types = lipid_types + add_lipid_types - not_protein_restypes = np.unique(pl2.atoms.select_atoms("not protein").residues.resnames) - membrane_restypes = [] - for type in lipid_types: - if type in not_protein_restypes: - membrane_restypes.append("resname " + type) - if len(membrane_restypes) == 1: - membrane_sel = membrane_restypes[0] - elif len(membrane_restypes) > 1: - membrane_sel = membrane_restypes[0] - for type in membrane_restypes[1:]: - membrane_sel = membrane_sel + " or " + type - else: - print("There are not lipid residues in your system") - - membrane_residues = pl2.atoms.select_atoms(membrane_sel).residues - for residue in membrane_residues: - assert residue.macro == "membrane" - -def test_protein_residues(): - structure = GIRK.coordinates - trajectory = GIRK.trajectory - add_lipid_types = [] - pl2 = PL2(structure, trajectory, add_lipid_types) - - # Test that the correct protein residues have been added to the macros attribute - protein_sel = "protein" - if ( - len(pl2.atoms.select_atoms(protein_sel).segments) > 1 - and pl2.atoms.select_atoms(protein_sel).segments.n_atoms - == pl2.atoms.select_atoms(protein_sel).n_atoms - ): - for segment_idx in range(len(pl2.atoms.select_atoms(protein_sel).segments)): - protein_segment = pl2.atoms.select_ - -# def test_lipid_types(): -# whole = mda.Universe.empty(n_atoms=0, n_residues=0, atom_resindex=([])) -# whole.add_TopologyAttr( -# "resnames", np.array([]) -# ) -# whole.add_TopologyAttr("resids", np.array([])) -# database = MembraneDatabase(whole) -# lipids = database.lipid_types() -# assert len(lipids) == 0 - -# # create an AtomGroup with two lipids -# whole = mda.Universe.empty(n_atoms=6, n_residues=2, atom_resindex=([0, 0, 0, 1, 1, 1])) -# whole.add_TopologyAttr( -# "resnames", np.array(["POPC", "POPG"]) -# ) -# whole.add_TopologyAttr("resids", np.array([0, 1])) -# database = MembraneDatabase(whole) -# lipids = database.lipid_types() -# assert len(lipids) == 2 -# assert "POPC" in lipids -# assert "POPG" in lipids - -# def test_lipid_count(): -# # create an AtomGroup with two lipids -# whole = mda.Universe.empty(n_atoms=6, n_residues=2, atom_resindex=([0, 0, 0, 1, 1, 1])) -# whole.add_TopologyAttr( -# "resnames", np.array(["POPC", "POPG"]) -# ) -# whole.add_TopologyAttr("resids", np.array([0, 1])) -# database = MembraneDatabase(whole) -# lipid_count = database.lipid_count() -# assert len(lipid_count) == 2 - -# def test_str_repr_MembraneDatabase(): -# whole = mda.Universe.empty(n_atoms=0) -# database = MembraneDatabase(whole) -# assert str(database) == "" -# assert repr(database) == "" - -# whole = mda.Universe.empty(n_atoms=6) -# database = MembraneDatabase(whole) -# assert str(database) == "" -# assert repr(database) == "" - -# def test_str_and_repr_QueryProteins(): -# whole = mda.Universe.empty(n_atoms=0) -# database = QueryProteins(whole) -# assert str(database) == "" -# assert repr(database) == "" - -# whole = mda.Universe.empty(n_atoms=6) -# database = QueryProteins(whole) -# assert str(database) == "" -# assert repr(database) == "" \ No newline at end of file diff --git a/prolint2/utils/__init__.py b/prolint2/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/prolint2/utils/utils.py b/prolint2/utils/utils.py new file mode 100644 index 0000000..e0badd2 --- /dev/null +++ b/prolint2/utils/utils.py @@ -0,0 +1,48 @@ +import numpy as np + +def fast_unique_comparison(residue_ids, lipid_ids, lipid_names): + """ + Get the unique combinations of residue and lipid ids. Vectorized implementation. + + Parameters + ---------- + residue_ids : np.ndarray + Array of residue ids. + lipid_ids : np.ndarray + Array of lipid ids. + lipid_names : np.ndarray + Array of lipid names. + + Returns + ------- + np.ndarray: Array of unique combinations of residue and lipid ids. + """ + # Combine the arrays into a single 2D array + combined_array = np.stack((residue_ids, lipid_ids), axis=-1) + + # Get lexicographically sorted indices + lex_sorted_indices = np.lexsort((combined_array[:, 1], combined_array[:, 0])) + + # Sort the combined array by the sorted indices + sorted_array = combined_array[lex_sorted_indices] + + # Calculate row-wise differences between consecutive sorted rows + row_diffs = np.diff(sorted_array, axis=0) + + # Find the indices where the differences are non-zero + unique_indices = np.where(np.any(row_diffs != 0, axis=1))[0] + + # Add the first index (0) to unique_indices, as it's always unique + unique_indices = np.concatenate(([0], unique_indices + 1)) + + # Extract the unique rows using the indices + unique_array = sorted_array[unique_indices] + + # Split the unique rows back into residue_ids and lipid_ids + unique_residue_ids, unique_lipid_ids = unique_array[:, 0], unique_array[:, 1] + + # Extract the corresponding lipid_names using the sorted indices + sorted_lipid_names = lipid_names[lex_sorted_indices] + unique_lipid_names = sorted_lipid_names[unique_indices] + + return unique_residue_ids, unique_lipid_ids, unique_lipid_names diff --git a/setup.cfg b/setup.cfg index d325bf4..bf9caab 100644 --- a/setup.cfg +++ b/setup.cfg @@ -6,7 +6,7 @@ omit = # Omit the tests */tests/* # Omit generated versioneer - ufcc/_version.py + prolint2/_version.py [yapf] # YAPF, in .style.yapf files this shows up as "[style]" header