support downloading and aut-merging for split files

Signed-off-by: Praneeth <[email protected]>
notAI-tech · Dec 1, 2020 · a69829e · a69829e
1 parent 17e5107
commit a69829e
Show file tree

Hide file tree

Showing 2 changed files with 49 additions and 28 deletions.
diff --git a/pydload/download.py b/pydload/download.py
@@ -6,12 +6,13 @@
 
 mb = 1024 * 1024
 
-def dload(url, save_to_path=None, timeout=10, max_time=30, verbose=True):
+def dload(url=None, urls=None, save_to_path=None, timeout=10, max_time=30, verbose=True):
     '''
 
     Parameters:
 
     url (str): URL of the file to be downloaded.
+    urls (list): Ordered list of URLs to be downloaded as a single file.
 
     save_to_path (str): Save as. If not provided, will be saved in the working directory with file_name auto identified from url.
 
@@ -29,13 +30,30 @@ def dload(url, save_to_path=None, timeout=10, max_time=30, verbose=True):
     False if downloading failed or stopped based on max_time. file_path if download is successful.
 
     '''
-    url = url.rstrip('/')
-    if 'http://' not in url[:7] and 'https://' not in url[:8]:
-        if verbose:
-            print('Assuming http://')
-        url = 'http://' + url
+    if url and urls and url != urls:
+        print("Only one of url or urls should be supplied")
+        return
+
+    if url and not urls:
+        urls = [url]
+    if isinstance(url, list):
+        urls = [i for i in url]
+
+    if not isinstance(urls, list):
+        print("urls should be a list")
+        return
+
+    for i, url in enumerate(urls):
+        url = url.rstrip('/')
+        if 'http://' not in url[:7] and 'https://' not in url[:8]:
+            if verbose:
+                print('Assuming http://')
+            url = 'http://' + url
+
+        urls[i] = url
 
     if not save_to_path:
+        url = urls[0]
         save_to_path = url.split('/')[-1].split('?')[0]
         if not save_to_path.strip():
             save_to_path = url.split('/')[-2]
@@ -52,25 +70,28 @@ def dload(url, save_to_path=None, timeout=10, max_time=30, verbose=True):
         if verbose:
             print("The download will be auto-terminated in", max_time, "if not completed.")
 
-    try:
-        request = requests.get(url, timeout=timeout, stream=True, verify=True, allow_redirects=True)
-    except:
-        if verbose:
-            print('SSL certificate not verified...')
-        request = requests.get(url, timeout=timeout, stream=True, verify=False, allow_redirects=True)
+    f = open(save_to_path, 'wb')
+    start_time = time.time()
 
-    file_size = None
-    try:
-        file_size = (float(request.headers['Content-length'])// mb) + 1
-    except:
-        if verbose:
-            print('Content-length not found, file size cannot be estimated.')
-        pass
+    for url in urls:
+        try:
+            request = requests.get(url, timeout=timeout, stream=True, verify=True, allow_redirects=True)
+        except:
+            if verbose:
+                print('SSL certificate not verified...')
+            request = requests.get(url, timeout=timeout, stream=True, verify=False, allow_redirects=True)
+
+        file_size = None
+        try:
+            file_size = (float(request.headers['Content-length'])// mb) + 1
+        except:
+            if verbose:
+                print('Content-length not found, file size cannot be estimated.')
+            pass
+
+        is_stopped = False
 
-    is_stopped = False
 
-    with open(save_to_path, 'wb') as f:
-        start_time = time.time()
         if verbose:
             for chunk in progressbar.progressbar(request.iter_content(mb), max_value=file_size, prefix='MB'):
                 f.write(chunk)
@@ -86,11 +107,11 @@ def dload(url, save_to_path=None, timeout=10, max_time=30, verbose=True):
                     if time.time() - start_time >= max_time:
                         is_stopped = True
                         break
-
-    if is_stopped:
-        if verbose:
-            print('Stopped due to excess time')
-        return False
+        
+        if is_stopped:
+            if verbose:
+                print('Stopped due to excess time')
+            return False
 
     else:
         if verbose:

diff --git a/setup.py b/setup.py
@@ -18,7 +18,7 @@
 EMAIL = '[email protected]'
 AUTHOR = 'BEDAPUDI PRANEETH'
 REQUIRES_PYTHON = '>=3.5.0'
-VERSION = '1.0.8'
+VERSION = '1.0.9'
 
 # What packages are required for this module to be executed?
 REQUIRED = [