Skip to content

Commit

Permalink
debugging 'export' method
Browse files Browse the repository at this point in the history
  • Loading branch information
danielpastor97 committed Apr 10, 2023
1 parent 78e30a2 commit c295cd8
Showing 1 changed file with 46 additions and 30 deletions.
76 changes: 46 additions & 30 deletions prolint2/contacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,6 @@ def __init__(self, query, database):
self.cutoff = None
self.contacts = None
self.contact_frames = None
self.contacts_df = None
self.metrics = None

# TODO:
Expand Down Expand Up @@ -253,7 +252,7 @@ def compute(self, cutoff=int(parameters_config["cutoff"]), get_metrics=False):
self.contacts = temp_instance.contacts
self.contact_frames = temp_instance.contact_frames
if get_metrics:
self.contacts_df, self.metrics = self.contacts_to_dataframe()
self.metrics = self.contacts_to_metrics()

# this functions allows the definition of chunks of frames with uninterrupted interactions
# i.e. it takes a list of frames as [9, 11, 12] and it returns [1, 2]
Expand All @@ -268,18 +267,17 @@ def ranges(self, lst):

def contacts_to_dataframe(self):
"""
Convert the contacts dictionary to a Pandas DataFrame with different metrics.
Convert the contacts dictionary to a Pandas DataFrame.
Returns
-------
Pandas DataFrame
Pandas DataFrame with different metrics for the contacts.
Pandas DataFrame with all the contacts.
"""
if not self.contacts:
raise ValueError("The contacts dictionary is empty.")
else:
results = []
metrics = []
keys = self.contacts.keys()
for idx, protein_resi in enumerate(keys):
for lip_type in self.contacts[protein_resi].keys():
Expand All @@ -299,7 +297,38 @@ def contacts_to_dataframe(self):
fr,
)
)
results_df = pd.DataFrame(
results,
columns=[
"Protein",
"Residue ID",
"Residue Name",
"Lipid Type",
"Lipid ID",
"Frame",
],
)
return results_df

def contacts_to_metrics(self):
"""
Convert the contacts dictionary to a Pandas DataFrame with different metrics.
Returns
-------
Pandas DataFrame
Pandas DataFrame with different metrics for the contacts.
"""
if not self.contacts:
raise ValueError("The contacts dictionary is empty.")
else:
metrics = []
keys = self.contacts.keys()
for idx, protein_resi in enumerate(keys):
for lip_type in self.contacts[protein_resi].keys():
for lip_res, t_frames in self.contacts[protein_resi][
lip_type
].items():
# getting chunks of frames with uninterrupted interactions
key = "{},{}".format(protein_resi, lip_res)
temp = list(self.ranges(self.contact_frames[key]))
Expand All @@ -318,18 +347,6 @@ def contacts_to_dataframe(self):
np.mean(temp),
)
)

results_df = pd.DataFrame(
results,
columns=[
"Protein",
"Residue ID",
"Residue Name",
"Lipid Type",
"Lipid ID",
"Frame",
],
)
metrics_df = pd.DataFrame(
metrics,
columns=[
Expand All @@ -344,8 +361,8 @@ def contacts_to_dataframe(self):
"Mean Duration",
],
)
return metrics_df

return results_df, metrics_df

def export(self, filename):
"""
Expand All @@ -356,10 +373,11 @@ def export(self, filename):
filename : str
Name of the file to export the contacts array.
"""
if not isinstance(self.contacts_df, pd.DataFrame):
self.contacts_df, self.metrics = self.contacts_to_dataframe()
self.contacts_df.to_csv(filename, index=False)
self.metrics.to_csv(filename.replace(".csv", "_metrics.csv"), index=False)
print("Exporting contacts and metrics to files...")
self.contacts_to_dataframe().to_csv(filename, index=False)
if not isinstance(self.metrics, pd.DataFrame):
self.contacts_to_metrics().to_csv(filename.replace(".csv", "_metrics.csv"), index=False)
print("Contacts successfully exported to file '{}' and metrics to '{}'!!".format(filename, filename.replace(".csv", "_metrics.csv")))

def filter_by_percentile(self, percentile=0.75, metric="Sum of all contacts"):
"""
Expand Down Expand Up @@ -396,6 +414,8 @@ def server_payload(self, metric="Sum of all contacts"):
} # TODO: we need to generate sub_data for each protein.
js = {protein: {k: [] for k in lipids}}

if not isinstance(self.metrics, pd.DataFrame):
self.metrics = self.contacts_to_metrics()
# get dictionary metrics
metric_dict = (
self.metrics.groupby(["Residue ID", "Lipid Type"])[metric]
Expand Down Expand Up @@ -504,17 +524,13 @@ def server_payload(self, metric="Sum of all contacts"):
return payload

def __str__(self):
if not isinstance(self.contacts_df, pd.DataFrame):
if self.contacts == None:
return "<prolint2.Contacts containing 0 contacts>"
else:
return "<prolint2.Contacts containing {} contacts>".format(
len(self.contacts_df.index)
)
return "<prolint2.Contacts containing {} contacts>".format(len(self.contacts))

def __repr__(self):
if not isinstance(self.contacts_df, pd.DataFrame):
if self.contacts == None:
return "<prolint2.Contacts containing 0 contacts>"
else:
return "<prolint2.Contacts containing {} contacts>".format(
len(self.contacts_df.index)
)
return "<prolint2.Contacts containing {} contacts>".format(len(self.contacts))

0 comments on commit c295cd8

Please sign in to comment.