changed structure of cache to be accessed as a CLI

sys-bio · Dec 19, 2023 · 2e71444 · 2e71444
1 parent 79a3e9a
commit 2e71444
Show file tree

Hide file tree

Showing 6 changed files with 116 additions and 87 deletions.
diff --git a/.github/workflows/run_bioModels_cache.yml b/.github/workflows/run_bioModels_cache.yml
@@ -1,4 +1,4 @@
-name: run bioModels_cache.py
+name: run BioModelsCache.py
 
 on:
   schedule:
@@ -23,4 +23,4 @@ jobs:
           pip install -r requirements.txt
 
       - name: execute py script 
-        run: python bioModels_cache.py
+        run: python BioModelsCache.py
diff --git a/setup.py b/setup.py
@@ -0,0 +1,15 @@
+from setuptools import setup, find_packages
+setup(
+    name='bioModels_cache',
+    version='0.1.0', 
+    author='Brigit Parrish',
+    author_email='[email protected]',
+    description='A CLI tool to cache BioModels for Sys-Bio Projects', 
+    url='https://github.com/sys-bio/BiomodelsCache',
+    packages=find_packages(where='src'), 
+    install_requires=[
+        'biomodels_restful_api_client ==0.1.1',
+    ],
+    entry_points={
+    },
+)
diff --git a/src/BioModelsCache.py b/src/BioModelsCache.py
@@ -0,0 +1,97 @@
+import json
+from biomodels_restful_api_client import services as bmservices
+import re
+import argparse
+
+
+class BioModelsCache:
+    def __init__(self, total_models=2000):
+        self.total_models = total_models
+        self.modelResults = {}
+
+    def remove_html_tags(self, text):
+        """
+        Removes HTML tags from a string.
+        Parameters:
+        1. text: A string of text with HTML tags that must be removed.
+
+        Returns:
+        str: The input string with all HTML tags removed.
+        """
+        clean = re.compile('<.*?>')
+        return re.sub(clean, '', text)
+
+    def extract_urls(self, text):
+        """
+        Extracts URLs from anchor tags (<a href="...">) in a string.
+
+        Parameters:
+        1. text: A string of text with anchor tags.
+
+        Returns:
+        list: A list of URLs extracted from anchor tags.
+        """
+        pattern = re.compile(r'<a href="([^"]*)">')
+        urls = pattern.findall(text)
+        return urls 
+
+
+    def update_cache(self, model):
+        """
+        Update the cache with the model data if it's not already present.
+
+        Parameters:
+        1. model: A dictionary representing the model data to be cached.
+
+        Returns:
+            bool: Returns True if the cache was updated with the model, False if it is not a BioModel or if the Biomodel
+            is already in the cache.
+        """
+        model_id = model['publicationId']
+        if "BIOMD" not in model_id:
+            return False
+        if model_id not in self.modelResults or self.modelResults[model_id] != model:
+            cleaned_description = self.remove_html_tags(model["description"])
+            url = self.extract_urls(cleaned_description)
+
+            self.modelResults[model_id] = {
+                'name': model.get('name', ''),
+                'url': url,
+                'model_id': model_id
+            }
+            return True
+        return False
+
+    def cache_biomodels(self):
+        """Fetch and cache information for a set number of BioModels."""
+        i = 0
+        modelIdentifiers = bmservices.get_model_identifiers()
+        models = modelIdentifiers["models"]
+
+        for nModel in models:
+            if i < self.total_models:
+                result = bmservices.get_model_info(nModel)
+                if 'publicationId' in result:
+                    updated_cache = self.update_cache(result)
+                    if updated_cache:
+                        i += 1
+
+        self.save_to_json()
+
+    def save_to_json(self):
+        """Saves the cached biomodel to the JSON file."""
+        with open('cached_biomodels.json', 'w') as json_file:
+            json.dump(self.modelResults, json_file)
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Cache BioModels data.')
+    parser.add_argument('--total', type=int, default=2000,
+                        help='Total number of models to cache (default: 2000)')
+    args = parser.parse_args()
+
+    cache = BioModelsCache(total_models=args.total)
+    cache.cache_biomodels()
+
+if __name__ == '__main__':
+    main()
diff --git a/src/biomodels_cache.py b/src/biomodels_cache.py
diff --git a/src/cached_biomodels.json b/src/cached_biomodels.json
@@ -1 +1 @@
-{"BIOMD007": {"description": "Description of BioModel 7"}}
+{"BIOMD0000000001": {"name": "Edelstein1996 - EPSP ACh event", "url": [], "model_id": "BIOMD0000000001"}, "BIOMD0000000002": {"name": "Edelstein1996 - EPSP ACh species", "url": [], "model_id": "BIOMD0000000002"}, "BIOMD0000000003": {"name": "Goldbeter1991 - Min Mit Oscil", "url": [], "model_id": "BIOMD0000000003"}, "BIOMD0000000004": {"name": "Goldbeter1991 - Min Mit Oscil, Expl Inact", "url": [], "model_id": "BIOMD0000000004"}, "BIOMD0000000005": {"name": "Tyson1991 - Cell Cycle 6 var", "url": [], "model_id": "BIOMD0000000005"}, "BIOMD0000000006": {"name": "Tyson1991 - Cell Cycle 2 var", "url": [], "model_id": "BIOMD0000000006"}, "BIOMD0000000007": {"name": "Novak1997 - Cell Cycle", "url": [], "model_id": "BIOMD0000000007"}}
diff --git a/src/tests/test_bioModels_cache.py b/src/tests/test_bioModels_cache.py
@@ -1,6 +1,6 @@
 import unittest
 from unittest.mock import patch, mock_open
-from  src.bioModels_cache import remove_html_tags, update_cache, save_to_json, cache_biomodels 
+from  src.BioModelsCache import remove_html_tags, update_cache
 
 class TestBioModelsCache(unittest.TestCase):
 
@@ -19,9 +19,6 @@ def test_update_cache_2(self):
         newModel = {'publicationId': 'BIOMD008', 'description': 'description of BIOMD008'}
         self.assertTrue(update_cache(model, newModel))
 
-
-
-
 
 if __name__ == '__main__':
     unittest.main()
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"BIOMD007": {"description": "Description of BioModel 7"}}
		{"BIOMD0000000001": {"name": "Edelstein1996 - EPSP ACh event", "url": [], "model_id": "BIOMD0000000001"}, "BIOMD0000000002": {"name": "Edelstein1996 - EPSP ACh species", "url": [], "model_id": "BIOMD0000000002"}, "BIOMD0000000003": {"name": "Goldbeter1991 - Min Mit Oscil", "url": [], "model_id": "BIOMD0000000003"}, "BIOMD0000000004": {"name": "Goldbeter1991 - Min Mit Oscil, Expl Inact", "url": [], "model_id": "BIOMD0000000004"}, "BIOMD0000000005": {"name": "Tyson1991 - Cell Cycle 6 var", "url": [], "model_id": "BIOMD0000000005"}, "BIOMD0000000006": {"name": "Tyson1991 - Cell Cycle 2 var", "url": [], "model_id": "BIOMD0000000006"}, "BIOMD0000000007": {"name": "Novak1997 - Cell Cycle", "url": [], "model_id": "BIOMD0000000007"}}