-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathvmextract.py
326 lines (245 loc) · 10.2 KB
/
vmextract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
import argparse
import glob
import json
import lxml.etree as et
import logging
import re
import subprocess
import tarfile
import os
XML_NAMESPACES = {
"ovf": "http://schemas.dmtf.org/ovf/envelope/1",
"ovirt": "http://www.ovirt.org/ovf",
"rasd": "http://schemas.dmtf.org/wbem/wscim/1/cim-schema/2/CIM_ResourceAllocationSettingData",
"vssd": "http://schemas.dmtf.org/wbem/wscim/1/cim-schema/2/CIM_VirtualSystemSettingData",
"xsi": "http://www.w3.org/2001/XMLSchema-instance",
"xenovf": "http://schemas.citrix.com/ovf/envelope/1"
}
def prefix_ns(ns, val):
return "{%s}%s" % (XML_NAMESPACES[ns], val)
def handle_elem(elem, handlers, mapper=None):
if mapper is None:
mapper = lambda e: e.tag
key = mapper(elem)
if key not in handlers:
logging.warn("Unknown tag, skipping: %s (%s)", key, elem.tag)
return
handlers[key](elem)
def noop_handler(elem):
pass
class ResourceType(object):
OTHER = 0
CPU = 3
MEMORY = 4
ETHERNET = 10
NET_OTHER = 11
FLOPPY_DRIVE = 14
CD_DRIVE = 15
DVD_DRIVE = 16
DISK_DRIVE = 17
STORAGE_EXTENT = 19
class VM(object):
def __init__(self):
self.id = None
self.name = None
self.cpu_count = None
self.cores_pre_socket = 1
self.memory_bytes = None
self.disks = []
def to_dict(self):
return {
'id': self.id,
'name': self.name,
'cpu_count': self.cpu_count,
'cores_pre_socket': self.cores_pre_socket,
'memory_bytes': self.memory_bytes,
'disks': self.disks
}
class OvfReader(object):
def __init__(self):
self._vm = VM()
self._ovf = None
def read_xen_ovf(self, ovf_root):
self._ovf = ovf_root
self._read_ovf_envelope(ovf_root)
self._check_required_fields()
return self._vm
def _read_ovf_envelope(self, elem):
for e in elem:
handle_elem(e, {
prefix_ns("ovf", "References"): noop_handler,
prefix_ns("ovf", "DiskSection"): noop_handler,
prefix_ns("ovf", "NetworkSection"): noop_handler,
prefix_ns("ovf", "StartupSection"): noop_handler,
prefix_ns("ovf", "VirtualSystem"): self._read_ovf_virtual_system
})
def _read_ovf_virtual_system(self, elem):
def set_name(name_elem):
self._vm.name = name_elem.text
self._vm.id = elem.attrib[prefix_ns("ovf", "id")]
for e in elem:
handle_elem(e, {
prefix_ns("ovf", "Info"): noop_handler,
prefix_ns("ovf", "Name"): set_name,
prefix_ns("ovf", "OperatingSystemSection"): noop_handler,
prefix_ns("ovf", "VirtualHardwareSection"): self._read_hardware
})
def _read_hardware(self, elem):
def handle_item(item):
handle_elem(item, {
ResourceType.CPU: self._read_hw_cpu,
ResourceType.MEMORY: self._read_hw_memory,
ResourceType.ETHERNET: noop_handler, # Network will not be attached
ResourceType.CD_DRIVE: noop_handler,
ResourceType.DVD_DRIVE: noop_handler,
ResourceType.STORAGE_EXTENT: self._read_hw_disk
}, lambda e: int(e.xpath("rasd:ResourceType/text()", namespaces=e.nsmap)[0]))
def handle_other_config(elem):
handle_elem(elem, {
"HVM_boot_params": noop_handler,
"HVM_boot_policy": noop_handler,
"platform": self._read_hw_platform,
"hardware_platform_version": noop_handler # Not relevant for oVirt
}, lambda e: e.attrib["Name"])
for e in elem:
handle_elem(e, {
prefix_ns("ovf", "Info"): noop_handler,
prefix_ns("ovf", "System"): noop_handler,
prefix_ns("ovf", "Item"): handle_item,
prefix_ns("xenovf", "VirtualSystemOtherConfigurationData"): handle_other_config
})
def _read_hw_cpu(self, elem):
if self._vm.cpu_count is not None:
raise RuntimeError("OVF contains multiple CPU elements.")
self._vm.cpu_count = int(elem.xpath("rasd:VirtualQuantity/text()", namespaces=elem.nsmap)[0])
def _read_hw_memory(self, elem):
if self._vm.memory_bytes is not None:
raise RuntimeError("OVF contains multiple memory elements.")
# Check if allocation units are MB
units = elem.xpath("rasd:AllocationUnits/text()", namespaces=elem.nsmap)[0]
if units != 'byte * 2^20':
raise RuntimeError("Memory units are not MB")
mem_mb = int(elem.xpath("rasd:VirtualQuantity/text()", namespaces=elem.nsmap)[0])
self._vm.memory_bytes = mem_mb * 1024 * 1024
def _read_hw_disk(self, elem):
disk_id = elem.xpath("rasd:InstanceID/text()", namespaces=elem.nsmap)[0]
# Find disk with this ID
disk_elem = self._ovf.xpath(
"/ovf:Envelope/ovf:DiskSection/ovf:Disk[@ovf:diskId='{disk_id}']".format(
disk_id=disk_id
),
namespaces=elem.nsmap
)[0]
file_id = disk_elem.attrib[prefix_ns("ovf", "fileRef")]
file_elem = self._ovf.xpath(
"/ovf:Envelope/ovf:References/ovf:File[@ovf:id='{file_id}']".format(
file_id=file_id
),
namespaces=elem.nsmap
)[0]
self._vm.disks.append({
'id': disk_id,
'name': str(elem.xpath("rasd:ElementName/text()", namespaces=elem.nsmap)[0]),
'capacity': int(disk_elem.attrib[prefix_ns("ovf", "capacity")]),
'bootable': disk_elem.attrib[prefix_ns("xenovf", "isBootable")] in ["true", "True"],
'file': file_elem.attrib[prefix_ns("ovf", "href")]
})
def _read_hw_platform(self, elem):
info_str = elem.xpath("xenovf:Value/text()", namespaces=elem.nsmap)[0]
for p in info_str.split(';'):
if not p:
continue
[key, value] = p.split('=',1)
if key == 'cores-per-socket':
self._vm.cores_pre_socket = int(value)
continue
def _check_required_fields(self):
if self._vm.id is None:
raise RuntimeError("VM ID is missing!")
if self._vm.name is None:
raise RuntimeError("Name is missing!")
if self._vm.cpu_count is None:
raise RuntimeError("CPU count information is missing!")
if self._vm.memory_bytes is None:
raise RuntimeError("Memory information is missing!")
def convert_disks(vm, skip_conversion):
for disk in vm.disks:
disk_file = disk["file"]
out_file = disk["id"] + ".qcow2"
if skip_conversion:
logging.info("Skipping conversion of disk: %s", disk_file)
logging.debug("Output assumed to be: %s", out_file)
disk["qcow_file"] = out_file
continue
logging.info("Converting disk: %s", disk_file)
err = subprocess.call([
"qemu-img",
"convert",
"-f", "vpc",
"-O", "qcow2",
disk_file,
out_file
])
if err != 0:
raise RuntimeError("Disk conversion failed")
logging.info("Conversion succeeded. Output: %s", out_file)
disk["qcow_file"] = out_file
def read_ovf(ovf_file):
with open(ovf_file, "r") as f:
contents = f.read()
version_tag_pattern = r'^\s*<\?xml\s*version="[^"]*"\s*encoding="([^"]*)"\s*\?>'
# Checking the utf versuion in the header
match = re.match(version_tag_pattern, contents)
if match and match.group(1) == 'utf-16':
# The OVF is probably not stored in UTF-16 format.
logging.warn('XML contains encoding="utf-16, ignoring"')
# Strip the encoding tag
return re.sub(version_tag_pattern, '', contents)
def main():
parser = argparse.ArgumentParser(
description="Extracts the OVA file, reads information from the OVF and converts disks from vhd format to qcow2."
)
parser.add_argument("-v", "--verbose", help="Show debug messages", action="store_true")
parser.add_argument("-s", "--skip-disk-conversion",
help="Do not call qemu-img to convert disks",
action="store_true")
parser.add_argument("filename", help="Xen OVA file or a directory containing the OVF file")
args = parser.parse_args()
logging.getLogger().setLevel(
logging.DEBUG if args.verbose else logging.INFO
)
path = args.filename
if os.path.isfile(path):
ova_dir = os.path.dirname(path)
ova_filename, ova_ext = os.path.splitext(path)
if ova_ext.lower() != '.ova':
raise RuntimeError("File is not an OVA")
with tarfile.open(path) as tar_file:
logging.info("Extracting OVA archive...")
def is_within_directory(directory, target):
abs_directory = os.path.abspath(directory)
abs_target = os.path.abspath(target)
prefix = os.path.commonprefix([abs_directory, abs_target])
return prefix == abs_directory
def safe_extract(tar, path=".", members=None, *, numeric_owner=False):
for member in tar.getmembers():
member_path = os.path.join(path, member.name)
if not is_within_directory(path, member_path):
raise Exception("Attempted Path Traversal in Tar File")
tar.extractall(path, members, numeric_owner=numeric_owner)
safe_extract(tar_file, path=ova_dir)
logging.info("Extraction finished.")
path = ova_dir
os.chdir(path)
ovf_files = glob.glob('*.ovf')
if not ovf_files:
logging.error("Directory %s does not contain an OVF file.", path)
return 1
ovf_contents = read_ovf(ovf_files[0])
ovf_root = et.fromstring(ovf_contents)
vm = OvfReader().read_xen_ovf(ovf_root)
convert_disks(vm, args.skip_disk_conversion)
with open("vm.json", "w") as f:
json.dump(vm.to_dict(), f, indent=4)
if __name__ == '__main__':
main()