Skip to content

Commit

Permalink
Merge pull request #146 from mwang87/new-api
Browse files Browse the repository at this point in the history
[Engine] Creating new API and docs for file loading and calling query
  • Loading branch information
mwang87 authored Aug 26, 2021
2 parents 74e09ac + 1ca0b82 commit 46a7b05
Show file tree
Hide file tree
Showing 7 changed files with 130 additions and 9 deletions.
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,18 @@ from massql import msql_engine
results_df = msql_engine.process_query(input_query, input_filename)
```

If you want to push in a data frame you already have, you can specify it
```
from massql import msql_engine
from massql import msql_fileloading
# Loading Data
ms1_df, ms2_df = msql_fileloading.load_data(input_filename)
# Executing Query
results_df = msql_engine.process_query(input_query, input_filename, ms1_df=ms1_df, ms2_df=ms2_df)
```

## Web API

```/api```
Expand Down
26 changes: 22 additions & 4 deletions massql/msql_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,26 @@ def _get_da_tolerance(qualifiers):
return None


def process_query(input_query, input_filename, path_to_grammar=None, cache=True, parallel=False):
def process_query(input_query, input_filename, path_to_grammar=None, cache=True, parallel=False, ms1_df=None, ms2_df=None):
"""
Process an actual query
Args:
input_query ([type]): [description]
input_filename ([type]): [description]
path_to_grammar ([type], optional): [description]. Defaults to None.
cache (bool, optional): [description]. Defaults to True.
parallel (bool, optional): [description]. Defaults to False.
ms1_df ([type], optional): [description]. Defaults to None. Pass in if you have these data in memory
ms2_df ([type], optional): [description]. Defaults to None. Pass in if you have these data in memory
Returns:
query results data frame: [description]
"""

parsed_dict = msql_parser.parse_msql(input_query, path_to_grammar=path_to_grammar)

return _evalute_variable_query(parsed_dict, input_filename, cache=cache, parallel=parallel)
return _evalute_variable_query(parsed_dict, input_filename, cache=cache, parallel=parallel, ms1_df=ms1_df, ms2_df=ms2_df)

def _determine_mz_max(mz, ppm_tol, da_tol):
da_tol = da_tol if da_tol < 10000 else 0
Expand All @@ -63,7 +79,7 @@ def _determine_mz_max(mz, ppm_tol, da_tol):

return mz + half_delta

def _evalute_variable_query(parsed_dict, input_filename, cache=True, parallel=False):
def _evalute_variable_query(parsed_dict, input_filename, cache=True, parallel=False, ms1_df=None, ms2_df=None):
# Lets check if there is a variable in here, the only one allowed is X
for condition in parsed_dict["conditions"]:
try:
Expand Down Expand Up @@ -138,7 +154,9 @@ def _evalute_variable_query(parsed_dict, input_filename, cache=True, parallel=Fa
# This is when the target is actually a float
pass

ms1_df, ms2_df = msql_fileloading.load_data(input_filename, cache=cache)
# Loading data if not passed in
if ms1_df is None:
ms1_df, ms2_df = msql_fileloading.load_data(input_filename, cache=cache)

# Here we are going to translate the variable query into a concrete query based upon the data
all_concrete_queries = []
Expand Down
28 changes: 28 additions & 0 deletions massql/msql_fileloading.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ def load_data(input_filename, cache=False):

elif input_filename[-4:].lower() == ".mgf":
ms1_df, ms2_df = _load_data_mgf(input_filename)

elif input_filename[-4:].lower() == ".txt":
ms1_df, ms2_df = _load_data_txt(input_filename)

else:
print("Cannot Load File Extension")
Expand Down Expand Up @@ -475,3 +478,28 @@ def _load_data_mzML(input_filename):

return ms1_df, ms2_df

def _load_data_txt(input_filename):
# We are assuming whitespace separated columns, first is mz, second is intensity, and will be marked as MS1
mz_list = []
i_list = []
for line in open(input_filename):
cleaned_line = line.rstrip()
if len(cleaned_line) == 0:
continue
mz, i = cleaned_line.split()

mz_list.append(float(mz))
i_list.append(float(i))

ms1_df = pd.DataFrame()
ms1_df['mz'] = mz_list
ms1_df['i'] = i_list
ms1_df['i_norm'] = ms1_df['i'] / max(ms1_df['i'])
ms1_df['i_tic_norm'] = ms1_df['i'] / sum(ms1_df['i'])
ms1_df['scan'] = 1
ms1_df['rt'] = 0
ms1_df['polarity'] = "Positive"

print(ms1_df)

return ms1_df, pd.DataFrame()
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ f875745343574b1680138b6e12103afd,sanity test,query_results
db21c5c316034b09a74668b53828b39b,Trimethoprem,query_results
3290b45278624b9b877bdee60dfbc7b3,Brominated,query_results
fcb68f55a00c4ea39153266cd2bd42de,Iron OR queries,query_results
0e132d964fef4f1a9f1a80d95b352920,Top Down Test,query_results
2 changes: 0 additions & 2 deletions proteosafe/test-integration/test_tasks.csv

This file was deleted.

58 changes: 58 additions & 0 deletions tests/test_data/top_down.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
132465.09 12.546343
133225.4931 12.19392
133989.6146 18.382821
134744.8918 16.622169
135508.8453 29.15272
135709.4353 12.315019
136269.7202 23.290751
136474.0863 16.300753
136823.8683 13.401818
137025.2584 42.788894
137234.0651 14.446877
137584.0828 13.276068
137785.3407 36.485754
137995.0349 14.735787
138340.1478 20.745298
138549.298 62.669336
138754.2691 23.062884
139105.4871 23.903191
139304.9367 54.209214
139509.8068 24.567254
139864.7173 23.295769
140065.2515 62.811609
140270.0596 30.497242
140623.8754 29.028855
140826.022 61.716949
141035.2999 20.697356
141383.9161 30.244086
141585.5889 76.422158
141794.0949 34.943156
142144.3674 54.074194
142345.3573 100
142553.5582 42.638963
142904.3269 35.756392
143105.3558 66.716483
143310.0193 33.103042
143665.4048 35.090121
143865.6924 70.924197
144073.8344 34.686524
144424.0522 27.382454
144625.9266 69.20136
144833.5925 26.57023
145185.3049 31.549792
145385.8676 84.545703
145589.8953 39.743775
145944.8025 28.254461
146149.9511 71.468122
146355.289 24.295305
146704.5286 19.559481
146905.8959 62.213523
147106.5088 29.916714
147669.7064 43.913333
147873.7765 17.187839
148426.2304 39.17724
148634.4224 20.539458
149190.0231 19.643502
149395.1611 10.928549
149953.621 15.454649
150715.2934 10.336184
12 changes: 9 additions & 3 deletions tests/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,8 +444,14 @@ def test_topdown():
MS2PROD=X+202:TOLERANCEMZ=10:INTENSITYMATCH=Y*0.5:INTENSITYMATCHPERCENT=50 AND \
MS2PROD=X-202:TOLERANCEMZ=10:INTENSITYMATCH=Y*0.5:INTENSITYMATCHPERCENT=50"
results_df = msql_engine.process_query(query, "tests/test_data/top_down.mgf")
assert(len(results_df) == 14)

query = query.replace("MS2DATA", "MS1DATA")
query = query.replace("MS2PROD", "MS1MZ")

results_df = msql_engine.process_query(query, "tests/test_data/top_down.txt")
assert(len(results_df) == 14)

print(results_df)


def test_quad_brominated():
Expand Down Expand Up @@ -593,10 +599,10 @@ def main():
#test_translator()
#test_ms1_iron_X_changes_intensity()
#test_nocache()
#test_topdown()
test_topdown()
#test_defect()
#test_or_against_iron()
test_quad_brominated()
#test_quad_brominated()

if __name__ == "__main__":
main()
Expand Down

0 comments on commit 46a7b05

Please sign in to comment.