Skip to content

Commit

Permalink
474 Update model hosting place (#463)
Browse files Browse the repository at this point in the history
Fixes #474 

### Description
This PR is used to update the model hosting place for bundles, it also
updates the corresponding CI pipeline, Blossom will be used to replace
Github workflow.

### Status
**Ready**

### Please ensure all the checkboxes:
<!--- Put an `x` in all the boxes that apply, and remove the not
applicable items -->
- [x] Codeformat tests passed locally by running `./runtests.sh
--codeformat`.
- [ ] In-line docstrings updated.
- [ ] Update `version` and `changelog` in `metadata.json` if changing an
existing bundle.
- [ ] Please ensure the naming rules in config files meet our
requirements (please refer to: `CONTRIBUTING.md`).
- [ ] Ensure versions of packages such as `monai`, `pytorch` and `numpy`
are correct in `metadata.json`.
- [ ] Descriptions should be consistent with the content, such as
`eval_metrics` of the provided weights and TorchScript modules.
- [ ] Files larger than 25MB are excluded and replaced by providing
download links in `large_file.yml`.
- [ ] Avoid using path that contains personal information within config
files (such as use `/home/your_name/` for `"bundle_root"`).

---------

Signed-off-by: Yiheng Wang <[email protected]>
  • Loading branch information
yiheng-wang-nv authored Jul 24, 2023
1 parent 9786cd8 commit 50d7614
Show file tree
Hide file tree
Showing 7 changed files with 423 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: update-model-info

on:
schedule:
- cron: "0 10 * * *" # 10:00, everyday
# - cron: "0 10 * * *" # 10:00, everyday
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

Expand All @@ -20,7 +20,7 @@ jobs:
python -m pip install -r requirements-dev.txt
changes=$(git diff --name-only HEAD^..HEAD -- models)
if [ ! -z "$changes" ]; then
python $(pwd)/ci/update_model_info.py --f "$changes"
python $(pwd)/ci/update_model_info_deparate.py --f "$changes"
fi
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
Expand Down
27 changes: 19 additions & 8 deletions ci/update_model_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@

from utils import (
compress_bundle,
create_pull_request,
download_large_files,
get_changed_bundle_list,
get_checksum,
get_existing_bundle_list,
get_hash_func,
get_json_dict,
push_new_model_info_branch,
save_model_info,
submit_pull_request,
upload_bundle,
)

Expand Down Expand Up @@ -74,15 +74,25 @@ def update_model_info(
checksum = get_checksum(dst_path=zipfile_path, hash_func=hash_func)

# step 3
# check if uploading a new bundle
model_info_path = os.path.join(models_path, model_info_file)
model_info = get_json_dict(model_info_path)
existing_bundle_list = get_existing_bundle_list(model_info)
exist_flag = False
if bundle_name in existing_bundle_list:
exist_flag = True
try:
source = upload_bundle(bundle_zip_file_path=zipfile_path, bundle_zip_filename=bundle_zip_name)
source = upload_bundle(
bundle_name=bundle_name,
version=latest_version,
root_path=temp_dir,
bundle_zip_name=bundle_zip_name,
exist_flag=exist_flag,
)
except Exception as e:
return (False, f"Upload bundle error: {e}")

# step 4
model_info_path = os.path.join(models_path, model_info_file)
model_info = get_json_dict(model_info_path)

if bundle_name_with_version not in model_info.keys():
model_info[bundle_name_with_version] = {"checksum": "", "source": ""}

Expand All @@ -105,6 +115,7 @@ def main(changed_dirs):
bundle_list = get_changed_bundle_list(changed_dirs)
models_path = "models"
model_info_file = "model_info.json"

if len(bundle_list) > 0:
for bundle in bundle_list:
# create a temporary copy of the bundle for further processing
Expand All @@ -120,8 +131,8 @@ def main(changed_dirs):
raise AssertionError(f"update bundle: {bundle} failed. {msg}")

# push a new branch that contains the updated model_info.json
branch_name = push_new_model_info_branch(model_info_path=os.path.join(models_path, model_info_file))
create_pull_request(branch_name)
submit_pull_request(model_info_path=os.path.join(models_path, model_info_file))
print("a pull request with updated model info is submitted.")
else:
print(f"all changed files: {changed_dirs} are not related to any existing bundles, skip updating.")

Expand Down
134 changes: 134 additions & 0 deletions ci/update_model_info_deparate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
# Copyright (c) MONAI Consortium
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import os
import shutil
import tempfile

from utils_deparate import (
compress_bundle,
create_pull_request,
download_large_files,
get_changed_bundle_list,
get_checksum,
get_hash_func,
get_json_dict,
push_new_model_info_branch,
save_model_info,
upload_bundle,
)


def update_model_info(
bundle_name: str, temp_dir: str, models_path: str = "models", model_info_file: str = "model_info.json"
):
"""
For a changed model (bundle), this function is used to do the following steps in order to update it:
1. download large files (if having the corresponding config file) into the copy.
2. compress the copy.
3. upload a compressed copy.
4. update `model_info_file`.
Returns:
a 2-tuple.
If update successful, the form is (True,"").
If update failed, the form is (False, "error reason")
"""
temp_path = os.path.join(temp_dir, bundle_name)
shutil.copytree(os.path.join(models_path, bundle_name), temp_path)
# step 1
try:
for large_file_type in [".yml", ".yaml", ".json"]:
large_file_name = "large_files" + large_file_type
large_file_path = os.path.join(temp_path, large_file_name)
if os.path.exists(large_file_path):
download_large_files(bundle_path=temp_path, large_file_name=large_file_name)
# remove the large file config
os.remove(large_file_path)
except Exception as e:
return (False, f"Download large files error: {e}")

# step 2
bundle_metadata_path = os.path.join(temp_path, "configs/metadata.json")
metadata = get_json_dict(bundle_metadata_path)
latest_version = metadata["version"]
bundle_zip_name = f"{bundle_name}_v{latest_version}.zip"
bundle_name_with_version = f"{bundle_name}_v{latest_version}"
zipfile_path = os.path.join(temp_dir, bundle_zip_name)
try:
compress_bundle(root_path=temp_dir, bundle_name=bundle_name, bundle_zip_name=bundle_zip_name)
except Exception as e:
return (False, f"Compress bundle error: {e}")

hash_func = get_hash_func(hash_type="sha1")
checksum = get_checksum(dst_path=zipfile_path, hash_func=hash_func)

# step 3
try:
source = upload_bundle(bundle_zip_file_path=zipfile_path, bundle_zip_filename=bundle_zip_name)
except Exception as e:
return (False, f"Upload bundle error: {e}")

# step 4
model_info_path = os.path.join(models_path, model_info_file)
model_info = get_json_dict(model_info_path)

if bundle_name_with_version not in model_info.keys():
model_info[bundle_name_with_version] = {"checksum": "", "source": ""}

model_info[bundle_name_with_version]["checksum"] = checksum
model_info[bundle_name_with_version]["source"] = source

save_model_info(model_info, model_info_path)
return (True, "update successful")


def main(changed_dirs):
"""
main function to process all changed files. It will do the following steps:
1. according to changed directories, get changed bundles.
2. update each bundle.
3. according to the update results, push changed model_info_file if needed.
"""
bundle_list = get_changed_bundle_list(changed_dirs)
models_path = "models"
model_info_file = "model_info.json"
if len(bundle_list) > 0:
for bundle in bundle_list:
# create a temporary copy of the bundle for further processing
temp_dir = tempfile.mkdtemp()
update_state, msg = update_model_info(
bundle_name=bundle, temp_dir=temp_dir, models_path=models_path, model_info_file=model_info_file
)
shutil.rmtree(temp_dir)

if update_state is True:
print(f"update bundle: {bundle} successful.")
else:
raise AssertionError(f"update bundle: {bundle} failed. {msg}")

# push a new branch that contains the updated model_info.json
branch_name = push_new_model_info_branch(model_info_path=os.path.join(models_path, model_info_file))
create_pull_request(branch_name)
else:
print(f"all changed files: {changed_dirs} are not related to any existing bundles, skip updating.")


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="")
parser.add_argument("-f", "--f", type=str, help="changed files.")
args = parser.parse_args()
changed_dirs = args.f.splitlines()
main(changed_dirs)
123 changes: 93 additions & 30 deletions ci/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@
import hashlib
import json
import os
import re
import shutil
import subprocess
from typing import List

from github import Github
from monai.apps.utils import download_url
from monai.bundle.config_parser import ConfigParser
from monai.utils import look_up_option
Expand Down Expand Up @@ -120,27 +122,32 @@ def get_latest_version(bundle_name: str, model_info_path: str):
return sorted(versions)[-1]


def push_new_model_info_branch(model_info_path: str):
email = os.environ["email"]
username = os.environ["username"]

def submit_pull_request(model_info_path: str):
# set required info for a pull request
branch_name = "auto-update-model-info"
create_push_cmd = f"git checkout -b {branch_name}; git push --set-upstream origin {branch_name}"

git_config = f"git config user.email {email}; git config user.name {username}"
commit_message = "git commit -m 'auto update model_info'"
full_cmd = f"{git_config}; git add {model_info_path}; {commit_message}; {create_push_cmd}"

call_status = subprocess.run(full_cmd, shell=True)
call_status.check_returncode()

return branch_name


def create_pull_request(branch_name: str, pr_title: str = "'auto update model_info [skip ci]'"):
create_command = f"gh pr create --fill --title {pr_title} --base dev --head {branch_name}"
call_status = subprocess.run(create_command, shell=True)
call_status.check_returncode()
pr_title = "auto update model_info [skip ci]"
pr_description = "This PR is automatically created to update model_info.json"
commit_message = "auto update model_info"
repo_file_path = "models/model_info.json"
# authenticate with Github CLI
github_token = os.environ["GITHUB_TOKEN"]
repo_name = "Project-MONAI/model-zoo"
g = Github(github_token)
# create new branch
repo = g.get_repo(repo_name)
default_branch = repo.default_branch
new_branch = repo.create_git_ref(ref=f"refs/heads/{branch_name}", sha=repo.get_branch(default_branch).commit.sha)
# push changes
model_info = get_json_dict(model_info_path)
repo.update_file(
path=repo_file_path,
message=commit_message,
content=json.dumps(model_info),
sha=repo.get_contents(repo_file_path, ref=default_branch).sha,
branch=new_branch.ref,
)
# create PR
repo.create_pull(title=pr_title, body=pr_description, head=new_branch.ref, base=default_branch)


def compress_bundle(root_path: str, bundle_name: str, bundle_zip_name: str):
Expand All @@ -156,15 +163,71 @@ def get_checksum(dst_path: str, hash_func):
return hash_func.hexdigest()


def split_bundle_name_version(bundle_name: str):
pattern_version = re.compile(r"^(.+)\_v(\d.*)$")
matched_result = pattern_version.match(bundle_name)
if matched_result is not None:
b_name, b_version = matched_result.groups()
return b_name, b_version
raise ValueError(f"{bundle_name} does not meet the naming format.")


def get_existing_bundle_list(model_info):
all_bundle_names = []
for k in model_info.keys():
bundle_name, _ = split_bundle_name_version(k)
if bundle_name not in all_bundle_names:
all_bundle_names.append(bundle_name)
return all_bundle_names


def create_bundle_to_ngc(bundle_name: str, org_name: str):
options = "--short-desc '' --application '' --format '' --framework MONAI --precision ''"
# models in NGC need to be lowercase
ngc_create_cmd = f"ngc registry model create {org_name}/{bundle_name.lower()} {options}"
try:
_ = subprocess.run(ngc_create_cmd, shell=True, check=True, stderr=subprocess.PIPE)
except subprocess.CalledProcessError as e:
msg = e.stderr.decode("utf-8")
if "already exists" in msg:
print(f"{bundle_name} already exists, skip creating.")
pass
else:
raise e


def upload_version_to_ngc(bundle_name: str, version: str, root_path: str, org_name: str):
upload_file = f"{bundle_name}_v{version}.zip"
ngc_upload_cmd = (
f"ngc registry model upload-version --source {upload_file} {org_name}/{bundle_name.lower()}:{version}"
)

try:
_ = subprocess.run(ngc_upload_cmd, shell=True, cwd=root_path, check=True, stderr=subprocess.PIPE)
except subprocess.CalledProcessError as e:
msg = e.stderr.decode("utf-8")
if "already exists" in msg:
print(f"{bundle_name} with version {version} already exists, skip uploading.")
pass
else:
raise e


def upload_bundle(
bundle_zip_file_path: str,
bundle_zip_filename: str,
release_tag: str = "hosting_storage_v1",
repo_name: str = "Project-MONAI/model-zoo",
bundle_name: str,
version: str,
root_path: str,
bundle_zip_name: str,
exist_flag: bool,
org_name: str = "nvidia/monaihosting",
):
upload_command = f"gh release upload {release_tag} {bundle_zip_file_path} -R {repo_name}"
call_status = subprocess.run(upload_command, shell=True)
call_status.check_returncode()
source = f"https://github.com/{repo_name}/releases/download/{release_tag}/{bundle_zip_filename}"

return source
if exist_flag is False:
# need to create bundle first
create_bundle_to_ngc(bundle_name=bundle_name, org_name=org_name)
# upload version
upload_version_to_ngc(bundle_name=bundle_name, version=version, root_path=root_path, org_name=org_name)
# access link
site = "https://api.ngc.nvidia.com/v2/models/"
access_link = f"{site}{org_name}/{bundle_name.lower()}/versions/{version}/files/{bundle_zip_name}"

return access_link
Loading

0 comments on commit 50d7614

Please sign in to comment.