diff --git a/ENCODETools.py b/ENCODETools.py new file mode 100644 index 0000000..4d06c7d --- /dev/null +++ b/ENCODETools.py @@ -0,0 +1,135 @@ +import os +import sys +import csv +import json +import jsonschema +import requests +from pyelasticsearch import ElasticSearch +import xlrd +import xlwt +from base64 import b64encode + +# set headers. UNCLEAR IF THIS IS USED PROPERLY +HEADERS = {'content-type': 'application/json'} + +# get object from server +def get_ENCODE(obj_id,keys): + '''GET an ENCODE object as JSON and return as dict''' + url = keys['server']+obj_id+'?limit=all' + response = requests.get(url, auth=(keys['authid'],keys['authpw']), headers=HEADERS) + if not response.status_code == 200: + print >> sys.stderr, response.text + return response.json() + +# get object from server +def GetENCODE(object_id,keys): + '''GET an ENCODE object as JSON and return as dict''' + if type(object_id) is str: + url = keys['server']+object_id+'?limit=all' + print(url) + try: + response = requests.get(url, auth=(keys['authid'],keys['authpw']), headers=HEADERS) + # nope + except Exception as e: + print("Get request failed:") + #print(e) + else: + return response.json() + + +# patch object to server +def patch_ENCODE(obj_id, patch_json): + '''PATCH an existing ENCODE object and return the response JSON''' + url = keys['server']+obj_id + json_payload = json.dumps(patch_json) + response = requests.patch(url, auth=(keys['authid'],keys['pw']), data=json_payload) + print "Patch:" + print response.status_code + if not response.status_code == 200: + print >> sys.stderr, response.text + return response.json() + +# post object to server +def new_ENCODE(collection_id, object_json): + '''POST an ENCODE object as JSON and return the resppnse JSON''' + url = SERVER+'/'+collection_id+'/' + json_payload = json.dumps(object_json) + response = requests.post(url, auth=(AUTHID, AUTHPW), headers=HEADERS, data=json_payload) + if not response.status_code == 201: + print >> sys.stderr, response.text + return response.json() + +# get keys from file +def KeyENCODE(key_file,server_name): + key_open = open(key_file) + keys = csv.DictReader(key_open,delimiter = '\t') + for key in keys: + if key.get('Server') == server_name: + key_info = {} + key_info['user'] = key.get('User') + key_info['server'] = ('http://' + key.get('Server') + '.encodedcc.org') + key_info['authid'] = key.get('ID') + key_info['authpw'] = key.get('PW') + key_open.close() + return(key_info) + +# read json objects from file +def ReadJSON(json_file): + json_load = open(json_file) + json_read = json.load(json_load) + json_load.close() + return json_read + +# write new json obect. SHOULD BE MODIFIED TO CUSTOM OUTPUT FORMAT (FOR HUMAN VIEWING) +def WriteJSON(new_object,object_file): + with open(object_file, 'w') as outfile: + json.dump(new_object, outfile) + outfile.close() + +# check json object for validity. SHOULD ONLY NEED OBJECT. NEED DEF TO EXTRACT VALUE (LIKE TYPE) FROM JSON OBJECT GRACEFULLY. +def ValidJSON(object_type,object_id,new_object): + #get the relevant schema + object_schema = get_ENCODE(('/profiles/' + object_type + '.json')) + + # test the new object. SHOULD HANDLE ERRORS GRACEFULLY + try: + jsonschema.validate(new_object,object_schema) + # did not validate + except Exception as e: + print('Validation of ' + object_id + ' failed.') + print(e) + return False + + # did validate + else: + # inform the user of the success + print('Validation of ' + object_id + ' succeeded.') + return True + +# intended to fix invalid JSON. DOES NOT DO ANYTHING YET. +def CleanJSON(object_type,object_id,new_object): + for key,value in new_object.list(): + new_object.pop(key) + if not ValidJSON(object_type,object_id,new_object): + new_object[key] = value + else: + return True + +# flatten embedded json objects to their ID +def FlatJSON(json_object,keys): + json_object = EmbedJSON(json_object,keys) + for key,value in json_object.items(): + if type(value) is dict: + json_object[key] = json_object[key][u'@id'] + return json_object + +# expand json object +def EmbedJSON(json_object,keys): + for key,value in json_object.items(): + if type(value) is unicode: + if str(value[0]) == '/': + json_sub_object = GetENCODE(str(value),keys) + if type(json_sub_object) is dict: + #json_sub_object = EmbedJSON(json_sub_object,keys) + json_object[key] = json_sub_object + return json_object diff --git a/update.py b/update.py index 5e59f8f..144430b 100644 --- a/update.py +++ b/update.py @@ -4,6 +4,10 @@ import json import jsonschema import requests +from pyelasticsearch import ElasticSearch +import xlrd +import xlwt +from base64 import b64encode # set headers. UNCLEAR IF THIS IS USED PROPERLY HEADERS = {'content-type': 'application/json'} @@ -32,7 +36,7 @@ def patch_ENCODE(obj_id, patch_json): # post object to server def new_ENCODE(collection_id, object_json): '''POST an ENCODE object as JSON and return the resppnse JSON''' - url = SERVER+collection_id + url = SERVER+'/'+collection_id+'/' json_payload = json.dumps(object_json) response = requests.post(url, auth=(AUTHID, AUTHPW), headers=HEADERS, data=json_payload) if not response.status_code == 201: @@ -51,9 +55,10 @@ def WriteJSON(new_object,object_file): This script will read in all objects in the objects folder, determine if they are different from the database object, and post or patch them to the database. Authentication is determined from the keys.txt file. ''' + # FUTURE: Should also be deal with errors that are only dependency based. # set server name. MODIFY TO HAVE USER CHOOSE SERVER (ENUM LIST FROM THE FILE) - server_name = 'submit-dev' + server_name = 'staging' # get ID, PW. MODIFY TO USE USERNAME/PASS TO GAIN ACCESS TO CREDENTIALS key_file = open('keys.txt') @@ -69,75 +74,74 @@ def WriteJSON(new_object,object_file): # let user know the server/user that is set for running script print(USER + ' will be running this update on ' + SERVER) + #print(AUTHID,AUTHPW) # load objects in object folder. MODIFY TO HAVE USER VIEW AND SELECT OBJECTS object_filenames = os.listdir('objects/') # run for each object in objects folder for object_filename in object_filenames: + if '.json' in object_filename: - # define object parameters. SHOULD NOT RELY ON FILENAME. NEED WAY TO IDENTIFY OBJECT TYPE/NAME BY REVIEWING DATA - object_type,object_name = object_filename.strip('.json').split(';') - object_file = ('objects/' + object_type + ';' + object_name + '.json') - object_id = ('/' + object_type + 's/' + object_name + '/') + # define object parameters. SHOULD NOT RELY ON FILENAME. NEED WAY TO IDENTIFY OBJECT TYPE/NAME BY REVIEWING DATA + object_type,object_name = object_filename.strip('.json').split(';') + object_file = ('objects/' + object_type + ';' + object_name + '.json') + object_collection = (object_type.replace('_','-') + 's') + object_id = ('/' + object_collection + '/' + object_name + '/') - # load object - json_object = open(object_file) - new_object = json.load(json_object) - json_object.close() + # load object SHOULD HANDLE ERRORS GRACEFULLY + json_object = open(object_file) + new_object = json.load(json_object) + json_object.close() - # check to see if object already exists - # PROBLEM: SHOULD CHECK UUID AND NOT USE ANY SHORTCUT METADATA THAT MIGHT NEED TO CHANGE - # BUT CAN'T USE UUID IF NEW... HENCE PROBLEM - old_object = get_ENCODE(object_id) + # check to see if object already exists + # PROBLEM: SHOULD CHECK UUID AND NOT USE ANY SHORTCUT METADATA THAT MIGHT NEED TO CHANGE + # BUT CAN'T USE UUID IF NEW... HENCE PROBLEM + old_object = get_ENCODE(object_id) - # if object is not found, verify and post it - if old_object.get(u'title') == u'Not Found': + # if object is not found, verify and post it + if old_object.get(u'title') == u'Not Found': - # get relevant schema - object_schema = get_ENCODE(('/profiles/' + object_type + '.json')) + # get relevant schema + object_schema = get_ENCODE(('/profiles/' + object_type + '.json')) - # test the new object. SHOULD HANDLE ERRORS GRACEFULLY - try: - jsonschema.validate(new_object,object_schema) - # did not validate - except Exception as e: - print('Validation of ' + object_id + ' failed.') - print(e) - - # did validate - else: - # inform the user of the success - print('Validation of ' + object_id + ' succeeded.') - - # post the new object(s). SHOULD HANDLE ERRORS GRACEFULLY - response = new_ENCODE('/users/',new_object) + # test the new object. SHOULD HANDLE ERRORS GRACEFULLY + try: + jsonschema.validate(new_object,object_schema) + # did not validate + except Exception as e: + print('Validation of ' + object_id + ' failed.') + print(e) + # did validate + else: + # inform the user of the success + print('Validation of ' + object_id + ' succeeded.') - # if object is found, check for differences and patch it if needed. - else: + # post the new object(s). SHOULD HANDLE ERRORS GRACEFULLY + response = new_ENCODE(object_collection,new_object) - # compare new object to old one, remove identical fields. - for key in new_object.keys(): - if new_object.get(key) == old_object.get(key): - new_object.pop(key) - - # if there are any different fields, patch them. SHOULD ALLOW FOR USER TO VIEW/APPROVE DIFFERENCES - if new_object: - - # inform user of the updates - print(object_id + ' has updates.') - print(new_object) - - # patch object - response = patch_ENCODE(object_id, new_object) - # inform user there are no updates + # if object is found, check for differences and patch it if needed. else: - print(object_id + ' has no updates.') - + # compare new object to old one, remove identical fields. + for key in new_object.keys(): + if new_object.get(key) == old_object.get(key): + new_object.pop(key) + # if there are any different fields, patch them. SHOULD ALLOW FOR USER TO VIEW/APPROVE DIFFERENCES + if new_object: + + # inform user of the updates + print(object_id + ' has updates.') + print(new_object) + + # patch object + response = patch_ENCODE(object_id, new_object) + # inform user there are no updates + else: + print(object_id + ' has no updates.') diff --git a/update2.py b/update2.py new file mode 100644 index 0000000..c8be859 --- /dev/null +++ b/update2.py @@ -0,0 +1,156 @@ +import sys +import os +import csv +import json +import jsonschema +import requests +from pyelasticsearch import ElasticSearch +import xlrd +import xlwt +from base64 import b64encode + +# ENCODE Tools functions +sys.path.append('/Users/Drew/Google Drive/Scripts/ENCODE-DCC/submission_sample_scripts/dte') +from ENCODETools import get_ENCODE +from ENCODETools import patch_ENCODE +from ENCODETools import new_ENCODE +from ENCODETools import GetENCODE +from ENCODETools import KeyENCODE +from ENCODETools import ReadJSON +from ENCODETools import WriteJSON +from ENCODETools import ValidJSON +from ENCODETools import CleanJSON +from ENCODETools import FlatJSON +from ENCODETools import EmbedJSON + +# set headers. UNCLEAR IF THIS IS USED PROPERLY +HEADERS = {'content-type': 'application/json'} + + +if __name__ == "__main__": + ''' + This script will read in all objects in the objects folder, determine if they are different from the database object, and post or patch them to the database. + Authentication is determined from the keys.txt file. + ''' + # FUTURE: Should also be deal with errors that are only dependency based. + + # set server name. MODIFY TO HAVE USER CHOOSE SERVER (ENUM LIST FROM THE FILE) + server_name = 'test' + + # set data file. MODIFY TO HAVE USER CHOOSE SERVER (ENUM LIST FROM THE FILE) + data_file = 'update.json' + + # get ID, PW. MODIFY TO USE USERNAME/PASS TO GAIN ACCESS TO CREDENTIALS + key_file = 'keys.txt' + keys = KeyENCODE(key_file,server_name) + + # let user know the server/user that is set for running script + print(keys['user'] + ' will be running this update on ' + keys['server']) + #print(AUTHID,AUTHPW) + + # load objects in object folder. MODIFY TO HAVE USER VIEW AND SELECT OBJECTS + #object_filenames = os.listdir('objects/') + + # run for each object in objects folder + #for object_filename in object_filenames: + #if '.json' in object_filename: + + # load object SHOULD HANDLE ERRORS GRACEFULLY + print('Opening ' + data_file) + json_object = ReadJSON('objects/' + data_file) + + # if the returned json object is not a list, put it in one + if type(json_object) is dict: + object_list = [] + object_list.append(json_object) + elif type(json_object) is list: + object_list = json_object + + for new_object in object_list: + + new_object = FlatJSON(new_object) + + # define object parameters. NEEDS TO RUN A CHECK TO CONFIRM THESE EXIST FIRST. + object_type = str(new_object[u'@type'][0]) + object_id = str(new_object[u'@id']) + object_uuid = str(new_object[u'uuid']) + object_name = str(new_object[u'accession']) + + # check to see if object already exists + # PROBLEM: SHOULD CHECK UUID AND NOT USE ANY SHORTCUT METADATA THAT MIGHT NEED TO CHANGE + # BUT CAN'T USE UUID IF NEW... HENCE PROBLEM + old_object = FlatJSON(get_ENCODE(object_id)) + +# # test the validity of new object +# if not ValidJSON(object_type,object_id,new_object): +# # get relevant schema +# object_schema = get_ENCODE(('/profiles/' + object_type + '.json')) +# +# # test the new object. SHOULD HANDLE ERRORS GRACEFULLY +# try: +# jsonschema.validate(new_object,object_schema) +# # did not validate +# except Exception as e: +# print('Validation of ' + object_id + ' failed.') +# print(e) +# +# # did validate +# else: +# # inform the user of the success +# print('Validation of ' + object_id + ' succeeded.') +# +# # post the new object(s). SHOULD HANDLE ERRORS GRACEFULLY +# response = new_ENCODE(object_collection,new_object) + + + # if object is not found, verify and post it + if old_object.get(u'title') == u'Not Found': + + # get relevant schema + object_schema = get_ENCODE(('/profiles/' + object_type + '.json')) + + # test the new object. SHOULD HANDLE ERRORS GRACEFULLY + try: + jsonschema.validate(new_object,object_schema) + # did not validate + except Exception as e: + print('Validation of ' + object_id + ' failed.') + print(e) + + # did validate + else: + # inform the user of the success + print('Validation of ' + object_id + ' succeeded.') + + # post the new object(s). SHOULD HANDLE ERRORS GRACEFULLY + response = new_ENCODE(object_collection,new_object) + + + # if object is found, check for differences and patch it if needed. + else: + + # compare new object to old one, remove identical fields. + for key in new_object.keys(): + if new_object.get(key) == old_object.get(key): + new_object.pop(key) + elif not old_object.get(key): + new_object.pop(key) + + # if there are any different fields, patch them. SHOULD ALLOW FOR USER TO VIEW/APPROVE DIFFERENCES + if new_object: + + # inform user of the updates + print(object_id + ' has updates.') + print(new_object) + + # patch each field to object individually + for key,value in new_object.items(): + patch_single = {} + patch_single[key] = value + response = patch_ENCODE(object_id, patch_single) + + # inform user there are no updates + else: + print(object_id + ' has no updates.') + +