Skip to content

Commit

Permalink
support downloading and aut-merging for split files
Browse files Browse the repository at this point in the history
Signed-off-by: Praneeth <[email protected]>
  • Loading branch information
bedapudi6788 committed Dec 1, 2020
1 parent 17e5107 commit a69829e
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 28 deletions.
75 changes: 48 additions & 27 deletions pydload/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@

mb = 1024 * 1024

def dload(url, save_to_path=None, timeout=10, max_time=30, verbose=True):
def dload(url=None, urls=None, save_to_path=None, timeout=10, max_time=30, verbose=True):
'''
Parameters:
url (str): URL of the file to be downloaded.
urls (list): Ordered list of URLs to be downloaded as a single file.
save_to_path (str): Save as. If not provided, will be saved in the working directory with file_name auto identified from url.
Expand All @@ -29,13 +30,30 @@ def dload(url, save_to_path=None, timeout=10, max_time=30, verbose=True):
False if downloading failed or stopped based on max_time. file_path if download is successful.
'''
url = url.rstrip('/')
if 'http://' not in url[:7] and 'https://' not in url[:8]:
if verbose:
print('Assuming http://')
url = 'http://' + url
if url and urls and url != urls:
print("Only one of url or urls should be supplied")
return

if url and not urls:
urls = [url]
if isinstance(url, list):
urls = [i for i in url]

if not isinstance(urls, list):
print("urls should be a list")
return

for i, url in enumerate(urls):
url = url.rstrip('/')
if 'http://' not in url[:7] and 'https://' not in url[:8]:
if verbose:
print('Assuming http://')
url = 'http://' + url

urls[i] = url

if not save_to_path:
url = urls[0]
save_to_path = url.split('/')[-1].split('?')[0]
if not save_to_path.strip():
save_to_path = url.split('/')[-2]
Expand All @@ -52,25 +70,28 @@ def dload(url, save_to_path=None, timeout=10, max_time=30, verbose=True):
if verbose:
print("The download will be auto-terminated in", max_time, "if not completed.")

try:
request = requests.get(url, timeout=timeout, stream=True, verify=True, allow_redirects=True)
except:
if verbose:
print('SSL certificate not verified...')
request = requests.get(url, timeout=timeout, stream=True, verify=False, allow_redirects=True)
f = open(save_to_path, 'wb')
start_time = time.time()

file_size = None
try:
file_size = (float(request.headers['Content-length'])// mb) + 1
except:
if verbose:
print('Content-length not found, file size cannot be estimated.')
pass
for url in urls:
try:
request = requests.get(url, timeout=timeout, stream=True, verify=True, allow_redirects=True)
except:
if verbose:
print('SSL certificate not verified...')
request = requests.get(url, timeout=timeout, stream=True, verify=False, allow_redirects=True)

file_size = None
try:
file_size = (float(request.headers['Content-length'])// mb) + 1
except:
if verbose:
print('Content-length not found, file size cannot be estimated.')
pass

is_stopped = False

is_stopped = False

with open(save_to_path, 'wb') as f:
start_time = time.time()
if verbose:
for chunk in progressbar.progressbar(request.iter_content(mb), max_value=file_size, prefix='MB'):
f.write(chunk)
Expand All @@ -86,11 +107,11 @@ def dload(url, save_to_path=None, timeout=10, max_time=30, verbose=True):
if time.time() - start_time >= max_time:
is_stopped = True
break

if is_stopped:
if verbose:
print('Stopped due to excess time')
return False
if is_stopped:
if verbose:
print('Stopped due to excess time')
return False

else:
if verbose:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
EMAIL = '[email protected]'
AUTHOR = 'BEDAPUDI PRANEETH'
REQUIRES_PYTHON = '>=3.5.0'
VERSION = '1.0.8'
VERSION = '1.0.9'

# What packages are required for this module to be executed?
REQUIRED = [
Expand Down

0 comments on commit a69829e

Please sign in to comment.