Skip to content

Commit

Permalink
made appeears order download script to download all finished orders
Browse files Browse the repository at this point in the history
  • Loading branch information
rbavery committed Oct 29, 2019
1 parent ede0685 commit 634590d
Show file tree
Hide file tree
Showing 8 changed files with 415 additions and 57 deletions.
358 changes: 301 additions & 57 deletions notebooks/ECOSTRESS_AppEEARS_API.ipynb

Large diffs are not rendered by default.

85 changes: 85 additions & 0 deletions scripts/appeears_download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import requests
import getpass
import os
import cgi

inDir = "/scratch/rave/" # Set input directory to the current working directory
os.chdir(inDir)

# Enter Earthdata login credentials
username = getpass.getpass('Earthdata Username:')
password = getpass.getpass('Earthdata Password:')

API = 'https://lpdaacsvc.cr.usgs.gov/appeears/api/' # Set the AρρEEARS API to a variable

# Insert API URL, call login service, provide credentials & return json
login_response = requests.post(f"{API}/login", auth=(username, password)).json()
del username, password
print(login_response)

# Assign the token to a variable
token = login_response['token']
head = {'Authorization': f"Bearer {token}"}
print(head)

response = requests.get(
'https://lpdaacsvc.cr.usgs.gov/appeears/api/task',
headers=head)
task_response = response.json()
print(task_response)

def done_task_ids(task_response):
done_task_ids = []
for task in task_response:
try:
if task['error'] != None:
print(f"Task {task['task_id']} had an error \n {task}")
except Exception as e:
print("Task doesn't have error key, probably still in pending state")
if task['status'] == 'done':
done_task_ids.append(task['task_id'])
else:
print(f"{task['task_id']} is in status {task['status']}")
return done_task_ids

def get_bundle_size_gb(bundle):
filesizes_gb = [i['file_size']/1e9 for i in bundle['files']]
return sum(filesizes_gb)

def get_bundles_and_sizes(task_ids):
bundles = []
for task_id in task_ids:
bundle = requests.get(f"{API}/bundle/{task_id}").json() # Call API and return bundle contents for the task_id as json
print(f"Size of bundle for {task_id} is {get_bundle_size_gb(bundle)} Gb")
bundles.append(bundle)
return bundles

def download_bundle(bundle, root_dir):
files = {}
for f in bundle['files']:
files[f['file_id']] = f['file_name'] # Fill dictionary with file_id as keys and file_name as values
# Set up output directory on local machine
outDir = f"{root_dir}taskid-{bundle['task_id']}"
if not os.path.exists(outDir):
os.makedirs(outDir)
print(f"made directory at {outDir}")
print(f"Downloading files for {bundle['task_id']}")
for file in files:
download_response = requests.get(f"{API}/bundle/{bundle['task_id']}/{file}", stream=True) # Get a stream to the bundle file
filename = os.path.basename(cgi.parse_header(download_response.headers['Content-Disposition'])[1]['filename']) # Parse the name from Content-Disposition header
filepath = os.path.join(outDir, filename) # Create output file path
with open(filepath, 'wb') as file: # Write file to dest dir
for data in download_response.iter_content(chunk_size=8192):
file.write(data)
print(f"Downloading {bundle['task_id']} complete!")

def download_bundles(bundles, root_dir):
for bundle in bundles:
download_bundle(bundle, root_dir)


done_task_ids = done_task_ids(task_response)
bundles = get_bundles_and_sizes(done_task_ids)
download_bundles(bundles, inDir)
print("Done downloading all completed bundles")

1 change: 1 addition & 0 deletions scripts/jupyter.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
jupyter lab --no-browser -y /home/ryan/work
8 changes: 8 additions & 0 deletions scripts/mount-blobfuse.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
#run this after starting the vm
sudo mkdir -p /az-ml-container
sudo mkdir -p /mnt/blobfusetmp
sudo chown -R ryan /mnt/blobfusetmp/
sudo chown -R ryan /az-ml-container/
blobfuse /az-ml-container --tmp-path=/mnt/blobfusetmp -o big_writes -o max_read=131072 -o max_write=131072 -o attr_timeout=240 -o fsname=blobfuse -o entry_timeout=240 -o negative_timeout=120 --config-file=/home/ryan/work/blobfuse.cfg

6 changes: 6 additions & 0 deletions scripts/mount-fileshare.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# once off case below
sudo mkdir -p /permmnt/$2
sudo chown -R ryan /permmnt/$2
sudo mount -t cifs //$1.file.core.windows.net/$2 /permmnt/$2 -o vers=3.0,username=$1,password=$3,dir_mode=0777,file_mode=0777,serverino

#1 is the storage name, 2 is the share name and 3 is the account key
6 changes: 6 additions & 0 deletions scripts/push_container.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# run if no workspace, make sure to publish container to registry
az ml workspace show -w CropMask_RCNN_Workspace -g cropmaskresource_grouphz --query containerRegistry
az acr login --name cropmaskcontainers
az acr update -n cropmaskcontainers --admin-enabled true
docker tag image_tag_on_local cropmaskcontainers.azurecr.io/remote_tag
docker push cropmaskcontainers.azurecr.io/remote_tag
6 changes: 6 additions & 0 deletions scripts/setup_dev_env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
source ~/.bashrc
/data/anaconda/bin/conda update -n base -c defaults conda -y
/data/anaconda/envs/py36/bin/conda env update -f ~/work/CropMask_RCNN/environment.yml -y
cd ~/work/CropMask_RCNN/
/data/anaconda/envs/py36/bin/python setup.py develop
conda install -c conda-forge jupyterlab=1.0.2 # updating to jupyterlab 1.x
2 changes: 2 additions & 0 deletions scripts/shp_to_geojson.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# run this in the folder that contains shapefiles, make sure there is a folder called geojson-aois next to it
for f in *.shp; do ogr2ogr -f geojson ../geojson-aois/$f-converted.geojson $f; done

0 comments on commit 634590d

Please sign in to comment.