made appeears order download script to download all finished orders

ecohydro · Oct 29, 2019 · 634590d · 634590d
1 parent ede0685
commit 634590d
Show file tree

Hide file tree

Showing 8 changed files with 415 additions and 57 deletions.
diff --git a/notebooks/ECOSTRESS_AppEEARS_API.ipynb b/notebooks/ECOSTRESS_AppEEARS_API.ipynb
diff --git a/scripts/appeears_download.py b/scripts/appeears_download.py
@@ -0,0 +1,85 @@
+import requests
+import getpass
+import os
+import cgi
+
+inDir = "/scratch/rave/"  # Set input directory to the current working directory
+os.chdir(inDir)      
+
+# Enter Earthdata login credentials
+username = getpass.getpass('Earthdata Username:')
+password = getpass.getpass('Earthdata Password:')
+
+API = 'https://lpdaacsvc.cr.usgs.gov/appeears/api/'  # Set the AρρEEARS API to a variable
+
+# Insert API URL, call login service, provide credentials & return json
+login_response = requests.post(f"{API}/login", auth=(username, password)).json() 
+del username, password
+print(login_response)
+
+# Assign the token to a variable
+token = login_response['token']
+head = {'Authorization': f"Bearer {token}"}
+print(head)
+
+response = requests.get(
+    'https://lpdaacsvc.cr.usgs.gov/appeears/api/task', 
+    headers=head)
+task_response = response.json()
+print(task_response)
+
+def done_task_ids(task_response):
+    done_task_ids = []
+    for task in task_response:
+        try:
+            if task['error'] != None:
+                print(f"Task {task['task_id']} had an error \n {task}")
+        except Exception as e:
+            print("Task doesn't have error key, probably still in pending state")
+        if task['status'] == 'done':
+            done_task_ids.append(task['task_id'])
+        else:
+            print(f"{task['task_id']} is in status {task['status']}")
+    return done_task_ids
+
+def get_bundle_size_gb(bundle):
+    filesizes_gb = [i['file_size']/1e9 for i in bundle['files']]
+    return sum(filesizes_gb)
+
+def get_bundles_and_sizes(task_ids):
+    bundles = []
+    for task_id in task_ids:
+        bundle = requests.get(f"{API}/bundle/{task_id}").json()    # Call API and return bundle contents for the task_id as json
+        print(f"Size of bundle for {task_id} is {get_bundle_size_gb(bundle)} Gb")
+        bundles.append(bundle)
+    return bundles
+
+def download_bundle(bundle, root_dir):
+    files = {}
+    for f in bundle['files']: 
+        files[f['file_id']] = f['file_name']    # Fill dictionary with file_id as keys and file_name as values
+    # Set up output directory on local machine
+    outDir = f"{root_dir}taskid-{bundle['task_id']}"
+    if not os.path.exists(outDir):
+        os.makedirs(outDir)
+        print(f"made directory at {outDir}")
+    print(f"Downloading files for {bundle['task_id']}")
+    for file in files:
+        download_response = requests.get(f"{API}/bundle/{bundle['task_id']}/{file}", stream=True)                                   # Get a stream to the bundle file
+        filename = os.path.basename(cgi.parse_header(download_response.headers['Content-Disposition'])[1]['filename'])    # Parse the name from Content-Disposition header 
+        filepath = os.path.join(outDir, filename)                                                                         # Create output file path
+        with open(filepath, 'wb') as file:                                                                                # Write file to dest dir
+            for data in download_response.iter_content(chunk_size=8192): 
+                file.write(data)
+    print(f"Downloading {bundle['task_id']} complete!")
+
+def download_bundles(bundles, root_dir):
+    for bundle in bundles:
+        download_bundle(bundle, root_dir)
+
+
+done_task_ids = done_task_ids(task_response)
+bundles = get_bundles_and_sizes(done_task_ids)
+download_bundles(bundles, inDir)
+print("Done downloading all completed bundles")
+
diff --git a/scripts/jupyter.sh b/scripts/jupyter.sh
@@ -0,0 +1 @@
+jupyter lab --no-browser -y /home/ryan/work
diff --git a/scripts/mount-blobfuse.sh b/scripts/mount-blobfuse.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+#run this after starting the vm
+sudo mkdir -p /az-ml-container
+sudo mkdir -p /mnt/blobfusetmp
+sudo chown -R ryan /mnt/blobfusetmp/
+sudo chown -R ryan /az-ml-container/
+blobfuse /az-ml-container --tmp-path=/mnt/blobfusetmp -o big_writes -o max_read=131072 -o max_write=131072 -o attr_timeout=240 -o fsname=blobfuse -o entry_timeout=240 -o negative_timeout=120 --config-file=/home/ryan/work/blobfuse.cfg
+
diff --git a/scripts/mount-fileshare.sh b/scripts/mount-fileshare.sh
@@ -0,0 +1,6 @@
+# once off case below
+sudo mkdir -p /permmnt/$2
+sudo chown -R ryan /permmnt/$2
+sudo mount -t cifs //$1.file.core.windows.net/$2 /permmnt/$2 -o vers=3.0,username=$1,password=$3,dir_mode=0777,file_mode=0777,serverino
+
+#1 is the storage name, 2 is the share name and 3 is the account key
diff --git a/scripts/push_container.sh b/scripts/push_container.sh
@@ -0,0 +1,6 @@
+    # run if no workspace, make sure to publish container to registry
+    az ml workspace show -w CropMask_RCNN_Workspace -g cropmaskresource_grouphz --query containerRegistry
+    az acr login --name cropmaskcontainers 
+    az acr update -n cropmaskcontainers --admin-enabled true
+    docker tag image_tag_on_local cropmaskcontainers.azurecr.io/remote_tag
+    docker push cropmaskcontainers.azurecr.io/remote_tag
diff --git a/scripts/setup_dev_env.sh b/scripts/setup_dev_env.sh
@@ -0,0 +1,6 @@
+source ~/.bashrc
+/data/anaconda/bin/conda update -n base -c defaults conda -y
+/data/anaconda/envs/py36/bin/conda env update -f ~/work/CropMask_RCNN/environment.yml -y
+cd ~/work/CropMask_RCNN/
+/data/anaconda/envs/py36/bin/python setup.py develop
+conda install -c conda-forge jupyterlab=1.0.2 # updating to jupyterlab 1.x
diff --git a/scripts/shp_to_geojson.sh b/scripts/shp_to_geojson.sh
@@ -0,0 +1,2 @@
+# run this in the folder that contains shapefiles, make sure there is a folder called geojson-aois next to it
+for f in *.shp; do ogr2ogr -f geojson ../geojson-aois/$f-converted.geojson $f; done
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# run this in the folder that contains shapefiles, make sure there is a folder called geojson-aois next to it
		for f in *.shp; do ogr2ogr -f geojson ../geojson-aois/$f-converted.geojson $f; done