Skip to content

Commit

Permalink
changed structure of cache to be accessed as a CLI
Browse files Browse the repository at this point in the history
  • Loading branch information
brigit-parrish committed Dec 19, 2023
1 parent 79a3e9a commit 2e71444
Show file tree
Hide file tree
Showing 6 changed files with 116 additions and 87 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/run_bioModels_cache.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: run bioModels_cache.py
name: run BioModelsCache.py

on:
schedule:
Expand All @@ -23,4 +23,4 @@ jobs:
pip install -r requirements.txt
- name: execute py script
run: python bioModels_cache.py
run: python BioModelsCache.py
15 changes: 15 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from setuptools import setup, find_packages
setup(
name='bioModels_cache',
version='0.1.0',
author='Brigit Parrish',
author_email='[email protected]',
description='A CLI tool to cache BioModels for Sys-Bio Projects',
url='https://github.com/sys-bio/BiomodelsCache',
packages=find_packages(where='src'),
install_requires=[
'biomodels_restful_api_client ==0.1.1',
],
entry_points={
},
)
97 changes: 97 additions & 0 deletions src/BioModelsCache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import json
from biomodels_restful_api_client import services as bmservices
import re
import argparse


class BioModelsCache:
def __init__(self, total_models=2000):
self.total_models = total_models
self.modelResults = {}

def remove_html_tags(self, text):
"""
Removes HTML tags from a string.
Parameters:
1. text: A string of text with HTML tags that must be removed.
Returns:
str: The input string with all HTML tags removed.
"""
clean = re.compile('<.*?>')
return re.sub(clean, '', text)

def extract_urls(self, text):
"""
Extracts URLs from anchor tags (<a href="...">) in a string.
Parameters:
1. text: A string of text with anchor tags.
Returns:
list: A list of URLs extracted from anchor tags.
"""
pattern = re.compile(r'<a href="([^"]*)">')
urls = pattern.findall(text)
return urls


def update_cache(self, model):
"""
Update the cache with the model data if it's not already present.
Parameters:
1. model: A dictionary representing the model data to be cached.
Returns:
bool: Returns True if the cache was updated with the model, False if it is not a BioModel or if the Biomodel
is already in the cache.
"""
model_id = model['publicationId']
if "BIOMD" not in model_id:
return False
if model_id not in self.modelResults or self.modelResults[model_id] != model:
cleaned_description = self.remove_html_tags(model["description"])
url = self.extract_urls(cleaned_description)

self.modelResults[model_id] = {
'name': model.get('name', ''),
'url': url,
'model_id': model_id
}
return True
return False

def cache_biomodels(self):
"""Fetch and cache information for a set number of BioModels."""
i = 0
modelIdentifiers = bmservices.get_model_identifiers()
models = modelIdentifiers["models"]

for nModel in models:
if i < self.total_models:
result = bmservices.get_model_info(nModel)
if 'publicationId' in result:
updated_cache = self.update_cache(result)
if updated_cache:
i += 1

self.save_to_json()

def save_to_json(self):
"""Saves the cached biomodel to the JSON file."""
with open('cached_biomodels.json', 'w') as json_file:
json.dump(self.modelResults, json_file)


def main():
parser = argparse.ArgumentParser(description='Cache BioModels data.')
parser.add_argument('--total', type=int, default=2000,
help='Total number of models to cache (default: 2000)')
args = parser.parse_args()

cache = BioModelsCache(total_models=args.total)
cache.cache_biomodels()

if __name__ == '__main__':
main()
80 changes: 0 additions & 80 deletions src/biomodels_cache.py

This file was deleted.

2 changes: 1 addition & 1 deletion src/cached_biomodels.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"BIOMD007": {"description": "Description of BioModel 7"}}
{"BIOMD0000000001": {"name": "Edelstein1996 - EPSP ACh event", "url": [], "model_id": "BIOMD0000000001"}, "BIOMD0000000002": {"name": "Edelstein1996 - EPSP ACh species", "url": [], "model_id": "BIOMD0000000002"}, "BIOMD0000000003": {"name": "Goldbeter1991 - Min Mit Oscil", "url": [], "model_id": "BIOMD0000000003"}, "BIOMD0000000004": {"name": "Goldbeter1991 - Min Mit Oscil, Expl Inact", "url": [], "model_id": "BIOMD0000000004"}, "BIOMD0000000005": {"name": "Tyson1991 - Cell Cycle 6 var", "url": [], "model_id": "BIOMD0000000005"}, "BIOMD0000000006": {"name": "Tyson1991 - Cell Cycle 2 var", "url": [], "model_id": "BIOMD0000000006"}, "BIOMD0000000007": {"name": "Novak1997 - Cell Cycle", "url": [], "model_id": "BIOMD0000000007"}}
5 changes: 1 addition & 4 deletions src/tests/test_bioModels_cache.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import unittest
from unittest.mock import patch, mock_open
from src.bioModels_cache import remove_html_tags, update_cache, save_to_json, cache_biomodels
from src.BioModelsCache import remove_html_tags, update_cache

class TestBioModelsCache(unittest.TestCase):

Expand All @@ -19,9 +19,6 @@ def test_update_cache_2(self):
newModel = {'publicationId': 'BIOMD008', 'description': 'description of BIOMD008'}
self.assertTrue(update_cache(model, newModel))





if __name__ == '__main__':
unittest.main()

0 comments on commit 2e71444

Please sign in to comment.