forked from hfg-gmuend/zoomaker
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathzoomaker.py
175 lines (160 loc) · 7.87 KB
/
zoomaker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import os
import subprocess
import yaml
import argparse
from huggingface_hub import hf_hub_download
import git
import requests
from tqdm import tqdm
import unicodedata
import re
class Zoomaker:
def __init__(self, yaml_file: str):
self.yaml_file = yaml_file
with open(yaml_file, "r") as f:
self.data = yaml.safe_load(f)
self._check_yaml()
def _check_yaml(self):
if "name" not in self.data:
raise Exception("❌ 'name' is missing in zoo.yaml")
if "resources" not in self.data:
raise Exception("❌ 'resources' is missing in zoo.yaml")
for group, resources in self.data["resources"].items():
for resource in resources:
if "name" not in resource:
raise Exception("❌ Resource must have 'name' attribute")
if "src" not in resource:
raise Exception("❌ Resource must have 'src' attribute")
if "type" not in resource:
raise Exception("❌ Resource must have 'type' attribute")
if "install_to" not in resource:
raise Exception("❌ Resource must have 'install_to' attribute")
type = resource["type"]
if type not in ["huggingface", "git", "download"]:
raise Exception(f"❌ Unknown resource type: {type}")
def install(self, no_symlinks: bool = False):
print(f"👋 ===> {self.yaml_file} <===")
print(f"name: {self.data.get('name', 'N/A')}")
print(f"version: {self.data.get('version', 'N/A')}\n")
if no_symlinks:
print(f"⛔️ installing resources without symlinks ...")
print(f"👇 installing resources ...")
counter = 0;
for group, resources in self.data["resources"].items():
print(f"\n{group}:")
for resource in resources:
name = resource["name"]
src = resource["src"]
type = resource["type"]
revision = resource.get("revision", None)
rename_to = resource.get("rename_to", None)
install_to = os.path.abspath(resource["install_to"])
counter += 1
print(f"\t{counter}. {name} to {install_to}")
os.makedirs(install_to, exist_ok=True)
# Hugging Face Hub
if type == "huggingface":
repo_id = "/".join(src.split("/")[0:2])
repo_filepath = "/".join(src.split("/")[2:])
downloaded = hf_hub_download(repo_id=repo_id, filename=repo_filepath, local_dir=install_to, revision=revision, local_dir_use_symlinks=False if no_symlinks else "auto")
if rename_to:
self._rename_file(downloaded, os.path.join(install_to, rename_to))
# Git
elif type == "git":
repo_path = os.path.join(install_to, self._get_repo_name(src))
if rename_to:
print(f"\trename_to is not supported for git repos. Ignoring rename_to: {rename_to}")
# existing repo
if os.path.exists(repo_path):
repo = git.Repo(repo_path)
if revision:
repo.git.checkout(revision)
print(f"\tgit checkout revision: {repo.head.object.hexsha}")
else:
repo.remotes.origin.pull()
print(f"\tgit pull: {repo.head.object.hexsha}")
# new repo
else:
repo = git.Repo.clone_from(src, repo_path, allow_unsafe_protocols=True, allow_unsafe_options=True)
if revision:
repo.git.checkout(revision)
print(f"\tgit checkout revision: {repo.head.object.hexsha}")
else:
repo.remotes.origin.pull()
print(f"\tgit pull latest: {repo.head.object.hexsha}")
# Download
else:
filename = self._slugify(os.path.basename(src))
downloaded = self._download_file(src, os.path.join(install_to, filename))
if rename_to:
self._rename_file(downloaded, os.path.join(install_to, rename_to))
if revision:
print(f"\trevision is not supported for download. Ignoring revision: {revision}")
print(f"\n✅ {counter} resources installed.")
def run(self, script_name: str):
if script_name not in self.data["scripts"]:
print(f"No script found with name: '{script_name}'")
if self.data["scripts"]:
print(f"\nAvailable scripts:")
for script_name in self.data["scripts"]:
print(f"zoomaker run {script_name}")
return
script_string = self.data["scripts"][script_name]
subprocess.check_call(script_string, shell=True)
def _get_repo_name(self, src: str):
if src.endswith(".git"):
return os.path.basename(src).replace(".git", "")
else:
return os.path.basename(src)
def _rename_file(self, src, dest):
# remove dest if exists due to os.rename limitation in Windows
if os.path.exists(dest):
os.remove(dest)
os.rename(src, dest)
else:
os.rename(src, dest)
def _download_file(self, url, filename):
response = requests.get(url, stream=True)
total_size_in_bytes = int(response.headers.get('content-length', 0))
block_size = 1024
progress_bar = tqdm(desc="\tdownloading", total=total_size_in_bytes, unit='iB', unit_scale=True, ncols=100)
with open(filename, 'wb') as file:
for data in response.iter_content(block_size):
progress_bar.update(len(data))
file.write(data)
progress_bar.close()
if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
print("Error: Failed to download the complete file.")
return None
return filename
def _slugify(self, value, allow_unicode=False):
"""
Makes a filename safe for usage on all filesystems.
Taken from https://github.com/django/django/blob/master/django/utils/text.py
Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
dashes to single dashes. Remove characters that aren't alphanumerics,
underscores, or hyphens. Convert to lowercase. Also strip leading and
trailing whitespace, dashes, and underscores.
"""
value = str(value)
if allow_unicode:
value = unicodedata.normalize('NFKC', value)
else:
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
value = re.sub(r'[^\w\s-]', '', value.lower())
return re.sub(r'[-\s]+', '-', value).strip('-_')
def main():
parser = argparse.ArgumentParser(description="Install models, git repos and run scripts defined in the zoo.yaml file.")
parser.add_argument("command", nargs="?", choices=["install", "run"], help="The command to execute.")
parser.add_argument("script", nargs="?", help="The script name to execute.")
parser.add_argument("--no-symlinks", action='store_true', help="Do not create symlinks for the installed resources.")
parser.add_argument("-v", "--version", action='version', help="The current version of the zoomaker.", version="0.7.0")
args = parser.parse_args()
if args.command == "install":
Zoomaker("zoo.yaml").install(args.no_symlinks)
elif args.command == "run":
Zoomaker("zoo.yaml").run(args.script)
else:
parser.print_help()
if __name__ == "__main__":
main()