-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
changed structure of cache to be accessed as a CLI
- Loading branch information
1 parent
79a3e9a
commit 2e71444
Showing
6 changed files
with
116 additions
and
87 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from setuptools import setup, find_packages | ||
setup( | ||
name='bioModels_cache', | ||
version='0.1.0', | ||
author='Brigit Parrish', | ||
author_email='[email protected]', | ||
description='A CLI tool to cache BioModels for Sys-Bio Projects', | ||
url='https://github.com/sys-bio/BiomodelsCache', | ||
packages=find_packages(where='src'), | ||
install_requires=[ | ||
'biomodels_restful_api_client ==0.1.1', | ||
], | ||
entry_points={ | ||
}, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
import json | ||
from biomodels_restful_api_client import services as bmservices | ||
import re | ||
import argparse | ||
|
||
|
||
class BioModelsCache: | ||
def __init__(self, total_models=2000): | ||
self.total_models = total_models | ||
self.modelResults = {} | ||
|
||
def remove_html_tags(self, text): | ||
""" | ||
Removes HTML tags from a string. | ||
Parameters: | ||
1. text: A string of text with HTML tags that must be removed. | ||
Returns: | ||
str: The input string with all HTML tags removed. | ||
""" | ||
clean = re.compile('<.*?>') | ||
return re.sub(clean, '', text) | ||
|
||
def extract_urls(self, text): | ||
""" | ||
Extracts URLs from anchor tags (<a href="...">) in a string. | ||
Parameters: | ||
1. text: A string of text with anchor tags. | ||
Returns: | ||
list: A list of URLs extracted from anchor tags. | ||
""" | ||
pattern = re.compile(r'<a href="([^"]*)">') | ||
urls = pattern.findall(text) | ||
return urls | ||
|
||
|
||
def update_cache(self, model): | ||
""" | ||
Update the cache with the model data if it's not already present. | ||
Parameters: | ||
1. model: A dictionary representing the model data to be cached. | ||
Returns: | ||
bool: Returns True if the cache was updated with the model, False if it is not a BioModel or if the Biomodel | ||
is already in the cache. | ||
""" | ||
model_id = model['publicationId'] | ||
if "BIOMD" not in model_id: | ||
return False | ||
if model_id not in self.modelResults or self.modelResults[model_id] != model: | ||
cleaned_description = self.remove_html_tags(model["description"]) | ||
url = self.extract_urls(cleaned_description) | ||
|
||
self.modelResults[model_id] = { | ||
'name': model.get('name', ''), | ||
'url': url, | ||
'model_id': model_id | ||
} | ||
return True | ||
return False | ||
|
||
def cache_biomodels(self): | ||
"""Fetch and cache information for a set number of BioModels.""" | ||
i = 0 | ||
modelIdentifiers = bmservices.get_model_identifiers() | ||
models = modelIdentifiers["models"] | ||
|
||
for nModel in models: | ||
if i < self.total_models: | ||
result = bmservices.get_model_info(nModel) | ||
if 'publicationId' in result: | ||
updated_cache = self.update_cache(result) | ||
if updated_cache: | ||
i += 1 | ||
|
||
self.save_to_json() | ||
|
||
def save_to_json(self): | ||
"""Saves the cached biomodel to the JSON file.""" | ||
with open('cached_biomodels.json', 'w') as json_file: | ||
json.dump(self.modelResults, json_file) | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser(description='Cache BioModels data.') | ||
parser.add_argument('--total', type=int, default=2000, | ||
help='Total number of models to cache (default: 2000)') | ||
args = parser.parse_args() | ||
|
||
cache = BioModelsCache(total_models=args.total) | ||
cache.cache_biomodels() | ||
|
||
if __name__ == '__main__': | ||
main() |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
{"BIOMD007": {"description": "Description of BioModel 7"}} | ||
{"BIOMD0000000001": {"name": "Edelstein1996 - EPSP ACh event", "url": [], "model_id": "BIOMD0000000001"}, "BIOMD0000000002": {"name": "Edelstein1996 - EPSP ACh species", "url": [], "model_id": "BIOMD0000000002"}, "BIOMD0000000003": {"name": "Goldbeter1991 - Min Mit Oscil", "url": [], "model_id": "BIOMD0000000003"}, "BIOMD0000000004": {"name": "Goldbeter1991 - Min Mit Oscil, Expl Inact", "url": [], "model_id": "BIOMD0000000004"}, "BIOMD0000000005": {"name": "Tyson1991 - Cell Cycle 6 var", "url": [], "model_id": "BIOMD0000000005"}, "BIOMD0000000006": {"name": "Tyson1991 - Cell Cycle 2 var", "url": [], "model_id": "BIOMD0000000006"}, "BIOMD0000000007": {"name": "Novak1997 - Cell Cycle", "url": [], "model_id": "BIOMD0000000007"}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters