Skip to content

Commit

Permalink
New Library: ENCODETools
Browse files Browse the repository at this point in the history
broke out a number of functions into ENCODETools.py.
  • Loading branch information
Drew Erickson committed Oct 15, 2013
1 parent 37a9126 commit 23e60ec
Show file tree
Hide file tree
Showing 3 changed files with 347 additions and 52 deletions.
135 changes: 135 additions & 0 deletions ENCODETools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import os
import sys
import csv
import json
import jsonschema
import requests
from pyelasticsearch import ElasticSearch
import xlrd
import xlwt
from base64 import b64encode

# set headers. UNCLEAR IF THIS IS USED PROPERLY
HEADERS = {'content-type': 'application/json'}

# get object from server
def get_ENCODE(obj_id,keys):
'''GET an ENCODE object as JSON and return as dict'''
url = keys['server']+obj_id+'?limit=all'
response = requests.get(url, auth=(keys['authid'],keys['authpw']), headers=HEADERS)
if not response.status_code == 200:
print >> sys.stderr, response.text
return response.json()

# get object from server
def GetENCODE(object_id,keys):
'''GET an ENCODE object as JSON and return as dict'''
if type(object_id) is str:
url = keys['server']+object_id+'?limit=all'
print(url)
try:
response = requests.get(url, auth=(keys['authid'],keys['authpw']), headers=HEADERS)
# nope
except Exception as e:
print("Get request failed:")
#print(e)
else:
return response.json()


# patch object to server
def patch_ENCODE(obj_id, patch_json):
'''PATCH an existing ENCODE object and return the response JSON'''
url = keys['server']+obj_id
json_payload = json.dumps(patch_json)
response = requests.patch(url, auth=(keys['authid'],keys['pw']), data=json_payload)
print "Patch:"
print response.status_code
if not response.status_code == 200:
print >> sys.stderr, response.text
return response.json()

# post object to server
def new_ENCODE(collection_id, object_json):
'''POST an ENCODE object as JSON and return the resppnse JSON'''
url = SERVER+'/'+collection_id+'/'
json_payload = json.dumps(object_json)
response = requests.post(url, auth=(AUTHID, AUTHPW), headers=HEADERS, data=json_payload)
if not response.status_code == 201:
print >> sys.stderr, response.text
return response.json()

# get keys from file
def KeyENCODE(key_file,server_name):
key_open = open(key_file)
keys = csv.DictReader(key_open,delimiter = '\t')
for key in keys:
if key.get('Server') == server_name:
key_info = {}
key_info['user'] = key.get('User')
key_info['server'] = ('http://' + key.get('Server') + '.encodedcc.org')
key_info['authid'] = key.get('ID')
key_info['authpw'] = key.get('PW')
key_open.close()
return(key_info)

# read json objects from file
def ReadJSON(json_file):
json_load = open(json_file)
json_read = json.load(json_load)
json_load.close()
return json_read

# write new json obect. SHOULD BE MODIFIED TO CUSTOM OUTPUT FORMAT (FOR HUMAN VIEWING)
def WriteJSON(new_object,object_file):
with open(object_file, 'w') as outfile:
json.dump(new_object, outfile)
outfile.close()

# check json object for validity. SHOULD ONLY NEED OBJECT. NEED DEF TO EXTRACT VALUE (LIKE TYPE) FROM JSON OBJECT GRACEFULLY.
def ValidJSON(object_type,object_id,new_object):
#get the relevant schema
object_schema = get_ENCODE(('/profiles/' + object_type + '.json'))

# test the new object. SHOULD HANDLE ERRORS GRACEFULLY
try:
jsonschema.validate(new_object,object_schema)
# did not validate
except Exception as e:
print('Validation of ' + object_id + ' failed.')
print(e)
return False

# did validate
else:
# inform the user of the success
print('Validation of ' + object_id + ' succeeded.')
return True

# intended to fix invalid JSON. DOES NOT DO ANYTHING YET.
def CleanJSON(object_type,object_id,new_object):
for key,value in new_object.list():
new_object.pop(key)
if not ValidJSON(object_type,object_id,new_object):
new_object[key] = value
else:
return True

# flatten embedded json objects to their ID
def FlatJSON(json_object,keys):
json_object = EmbedJSON(json_object,keys)
for key,value in json_object.items():
if type(value) is dict:
json_object[key] = json_object[key][u'@id']
return json_object

# expand json object
def EmbedJSON(json_object,keys):
for key,value in json_object.items():
if type(value) is unicode:
if str(value[0]) == '/':
json_sub_object = GetENCODE(str(value),keys)
if type(json_sub_object) is dict:
#json_sub_object = EmbedJSON(json_sub_object,keys)
json_object[key] = json_sub_object
return json_object
108 changes: 56 additions & 52 deletions update.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
import json
import jsonschema
import requests
from pyelasticsearch import ElasticSearch
import xlrd
import xlwt
from base64 import b64encode

# set headers. UNCLEAR IF THIS IS USED PROPERLY
HEADERS = {'content-type': 'application/json'}
Expand Down Expand Up @@ -32,7 +36,7 @@ def patch_ENCODE(obj_id, patch_json):
# post object to server
def new_ENCODE(collection_id, object_json):
'''POST an ENCODE object as JSON and return the resppnse JSON'''
url = SERVER+collection_id
url = SERVER+'/'+collection_id+'/'
json_payload = json.dumps(object_json)
response = requests.post(url, auth=(AUTHID, AUTHPW), headers=HEADERS, data=json_payload)
if not response.status_code == 201:
Expand All @@ -51,9 +55,10 @@ def WriteJSON(new_object,object_file):
This script will read in all objects in the objects folder, determine if they are different from the database object, and post or patch them to the database.
Authentication is determined from the keys.txt file.
'''
# FUTURE: Should also be deal with errors that are only dependency based.

# set server name. MODIFY TO HAVE USER CHOOSE SERVER (ENUM LIST FROM THE FILE)
server_name = 'submit-dev'
server_name = 'staging'

# get ID, PW. MODIFY TO USE USERNAME/PASS TO GAIN ACCESS TO CREDENTIALS
key_file = open('keys.txt')
Expand All @@ -69,75 +74,74 @@ def WriteJSON(new_object,object_file):

# let user know the server/user that is set for running script
print(USER + ' will be running this update on ' + SERVER)
#print(AUTHID,AUTHPW)

# load objects in object folder. MODIFY TO HAVE USER VIEW AND SELECT OBJECTS
object_filenames = os.listdir('objects/')

# run for each object in objects folder
for object_filename in object_filenames:
if '.json' in object_filename:

# define object parameters. SHOULD NOT RELY ON FILENAME. NEED WAY TO IDENTIFY OBJECT TYPE/NAME BY REVIEWING DATA
object_type,object_name = object_filename.strip('.json').split(';')
object_file = ('objects/' + object_type + ';' + object_name + '.json')
object_id = ('/' + object_type + 's/' + object_name + '/')
# define object parameters. SHOULD NOT RELY ON FILENAME. NEED WAY TO IDENTIFY OBJECT TYPE/NAME BY REVIEWING DATA
object_type,object_name = object_filename.strip('.json').split(';')
object_file = ('objects/' + object_type + ';' + object_name + '.json')
object_collection = (object_type.replace('_','-') + 's')
object_id = ('/' + object_collection + '/' + object_name + '/')

# load object
json_object = open(object_file)
new_object = json.load(json_object)
json_object.close()
# load object SHOULD HANDLE ERRORS GRACEFULLY
json_object = open(object_file)
new_object = json.load(json_object)
json_object.close()

# check to see if object already exists
# PROBLEM: SHOULD CHECK UUID AND NOT USE ANY SHORTCUT METADATA THAT MIGHT NEED TO CHANGE
# BUT CAN'T USE UUID IF NEW... HENCE PROBLEM
old_object = get_ENCODE(object_id)
# check to see if object already exists
# PROBLEM: SHOULD CHECK UUID AND NOT USE ANY SHORTCUT METADATA THAT MIGHT NEED TO CHANGE
# BUT CAN'T USE UUID IF NEW... HENCE PROBLEM
old_object = get_ENCODE(object_id)

# if object is not found, verify and post it
if old_object.get(u'title') == u'Not Found':
# if object is not found, verify and post it
if old_object.get(u'title') == u'Not Found':

# get relevant schema
object_schema = get_ENCODE(('/profiles/' + object_type + '.json'))
# get relevant schema
object_schema = get_ENCODE(('/profiles/' + object_type + '.json'))

# test the new object. SHOULD HANDLE ERRORS GRACEFULLY
try:
jsonschema.validate(new_object,object_schema)
# did not validate
except Exception as e:
print('Validation of ' + object_id + ' failed.')
print(e)

# did validate
else:
# inform the user of the success
print('Validation of ' + object_id + ' succeeded.')

# post the new object(s). SHOULD HANDLE ERRORS GRACEFULLY
response = new_ENCODE('/users/',new_object)
# test the new object. SHOULD HANDLE ERRORS GRACEFULLY
try:
jsonschema.validate(new_object,object_schema)
# did not validate
except Exception as e:
print('Validation of ' + object_id + ' failed.')
print(e)

# did validate
else:
# inform the user of the success
print('Validation of ' + object_id + ' succeeded.')

# if object is found, check for differences and patch it if needed.
else:
# post the new object(s). SHOULD HANDLE ERRORS GRACEFULLY
response = new_ENCODE(object_collection,new_object)

# compare new object to old one, remove identical fields.
for key in new_object.keys():
if new_object.get(key) == old_object.get(key):
new_object.pop(key)

# if there are any different fields, patch them. SHOULD ALLOW FOR USER TO VIEW/APPROVE DIFFERENCES
if new_object:

# inform user of the updates
print(object_id + ' has updates.')
print(new_object)

# patch object
response = patch_ENCODE(object_id, new_object)

# inform user there are no updates
# if object is found, check for differences and patch it if needed.
else:
print(object_id + ' has no updates.')


# compare new object to old one, remove identical fields.
for key in new_object.keys():
if new_object.get(key) == old_object.get(key):
new_object.pop(key)

# if there are any different fields, patch them. SHOULD ALLOW FOR USER TO VIEW/APPROVE DIFFERENCES
if new_object:

# inform user of the updates
print(object_id + ' has updates.')
print(new_object)

# patch object
response = patch_ENCODE(object_id, new_object)

# inform user there are no updates
else:
print(object_id + ' has no updates.')


Loading

0 comments on commit 23e60ec

Please sign in to comment.