-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added directory_to_library and library_permissions scripts to master …
…branch.
- Loading branch information
Showing
2 changed files
with
536 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,325 @@ | ||
''' | ||
Script to make data library of local file/directory structure. | ||
usage: directory_to_library.py [-h] [-u URL] [-k KEY] [-n NAME] [-v] | ||
[-t [FILETYPES [FILETYPES ...]]] [-e] | ||
[-a [ALLOW_USERS [ALLOW_USERS ...]]] | ||
directory | ||
Make a galaxy data library from a file/directory structure. | ||
positional arguments: | ||
directory the directory to make a data library from | ||
optional arguments: | ||
-h, --help show this help message and exit | ||
-u URL, --url URL the Galaxy URL | ||
-k KEY, --key KEY the Galaxy API key to use (overrides default) | ||
-n NAME, --name NAME the name of the data library to create (overrides | ||
default). Using an existing data library name will | ||
update the existing library. | ||
-v, --verbose Print out debugging information | ||
-t [FILETYPES [FILETYPES ...]], --filetypes [FILETYPES [FILETYPES ...]] | ||
A space-seperated list of filetypes to include in the | ||
data library. Defaults to fna, faa, ffn, gbk, gff | ||
-e, --exclude Exclude the file types specified in -t. Defaults to | ||
excluding fna, faa, ffn, gbk, gff | ||
-a [ALLOW_USERS [ALLOW_USERS ...]], --allow_users [ALLOW_USERS [ALLOW_USERS ...]] | ||
A space-seperated list of emails of users to allow | ||
access to the data library. Defaults to None- a public | ||
library. | ||
Needs an API key in GALAXY_KEY unless specified via command line | ||
Assumes Galaxy instance exists at localhost unless otherwise specified. | ||
''' | ||
|
||
from __future__ import print_function | ||
from bioblend.galaxy import GalaxyInstance | ||
|
||
import argparse | ||
import os | ||
import sys | ||
|
||
|
||
def printerr(*args): | ||
''' | ||
Function for printing to stderr. | ||
:param args: The contents to be printed to stderr. | ||
:return: None. | ||
''' | ||
|
||
print(*args, file=sys.stderr) | ||
|
||
|
||
def getFilesInLibrary(contents): | ||
''' | ||
Function for getting a list of all files in a library. | ||
:param contents: The contents of the library - can be obtained with show_library(lib['id'], contents=True). | ||
:return: A list of file names, including their directory path (strings) within library. | ||
''' | ||
|
||
return [item['name'] for item in contents if item['type'] == 'file'] | ||
|
||
|
||
def getFilesToInclude(filepath, file_types, exclude=False): | ||
''' | ||
Function for getting a list of all files of a given type (or the inverse). | ||
:param filepath: The path of the folder containing the files. | ||
:param file_types: A list of file types you wish to include/exclude. | ||
:param include: True if you want to get files matching those in fileTypes, | ||
False if you want to exclude files matching those in fileTypes. | ||
:return: A list of file names (strings) within folder | ||
''' | ||
|
||
# Get all files and folders in the specified directory | ||
dirlist = [] | ||
for root, dirs, files in os.walk(filepath): | ||
for name in files: | ||
dirlist.append(os.path.join(root, name).replace(filepath, "")) | ||
|
||
# By default, we check if the filetype is in the filetypes. | ||
compareFunc=lambda ftype,ftypes: ftype.endswith(tuple(ftypes)) | ||
if exclude: # Overriding default behaviour | ||
compareFunc=lambda ftype,ftypes: not ftype.endswith(tuple(ftypes)) | ||
|
||
files_to_include = [] | ||
for fileName in dirlist: | ||
# Compare the file extension with the fileTypes list. | ||
if compareFunc(fileName, file_types): | ||
if fileName.split("/")[-1][0] != ".": # Don't include hidden files. | ||
files_to_include.append(fileName) | ||
|
||
return files_to_include | ||
|
||
def makeDirectoryOrFile(gi, lib, galaxy_parent_dir, local_parent_dir, filepath, dir_index, galaxy_url, verbose): | ||
""" | ||
Recursive function for traversing a filepath, and at each step, make either a directory or a file in a | ||
galaxy data library. Allows us to copy a whole directory structure in a galaxy data library. | ||
:param gi: Galaxy instance object | ||
:param lib: The Galaxy library object, representing the library to create the directory structure in | ||
:param galaxy_parent_dir: The galaxy directory object of the parent dir of our current traversal location. | ||
:param local_parent_dir: The local filepath that preceeds the top-level directory of the structure we're creating. | ||
:param filepath: The filepath to traverse, as a list (e.g. ['refseq', 'salmonella', 'ABC.gbk']) | ||
:param dir_index: The index of the filepath that we're currently looking at | ||
:param galaxy_url: The URL of the galaxy instance | ||
:param verbose: True if we're outputting debugging info. | ||
:return: None | ||
""" | ||
|
||
current_filepath = filepathToString(filepath[:dir_index + 1]) | ||
lib_dirs = [d['name'] for d in gi.libraries.get_folders(lib['id'])] | ||
if dir_index == len(filepath)-1: | ||
makeFile(gi, lib, galaxy_parent_dir, local_parent_dir, filepath, galaxy_url, verbose) | ||
else: | ||
# Check if folder exists, get required info if it does, otherwise create it | ||
if current_filepath in lib_dirs: | ||
if verbose: print("Directory exists: " + current_filepath) | ||
|
||
# Get directory information | ||
galaxy_folder = gi.libraries.get_folders(lib['id'], name=current_filepath)[0] | ||
else: | ||
if verbose: print("Adding directory to library - " + current_filepath) | ||
galaxy_folder = gi.libraries.create_folder(lib['id'], | ||
current_filepath.split("/")[-1], | ||
base_folder_id=galaxy_parent_dir['id'])[0] | ||
|
||
dir_index += 1 | ||
makeDirectoryOrFile(gi, lib, galaxy_folder, local_parent_dir, filepath, dir_index, galaxy_url, verbose) | ||
|
||
|
||
def makeFile(gi, lib, galaxy_parent_dir, local_parent_dir, filepath, galaxy_url, verbose): | ||
""" | ||
Function to add a file to a galaxy data library. | ||
If the Galaxy instance is local, it will make a symlink instead of uploading. | ||
:param gi: Galaxy instance object | ||
:param lib: The Galaxy library object, representing the library to create the directory structure in | ||
:param galaxy_parent_dir: The galaxy directory object of the parent dir of the file. | ||
:param local_parent_dir: The local filepath that preceeds the filepath param below. | ||
:param filepath: The filepath to traverse, as a list (e.g. ['refseq', 'salmonella', 'ABC.gbk']) | ||
:param galaxy_url: The URL of the galaxy instance | ||
:param verbose: True if we're outputting debugging info. | ||
:return: None | ||
""" | ||
|
||
filename = filepath[-1] | ||
# If file doesn't exist, add it | ||
if filepathToString(filepath) not in getFilesInLibrary(gi.libraries.show_library(lib['id'], contents=True)): | ||
if verbose: print("Adding file - " + filepathToString(filepath)) | ||
|
||
filetype = 'auto' | ||
|
||
simon_filename, simon_file_extension = os.path.splitext(filename) | ||
print ("filename " + filename + " filetype " + simon_file_extension) | ||
if simon_file_extension == '.fq' or simon_file_extension == '.fastq': | ||
filetype = 'fastqsanger' | ||
|
||
if "127.0.0.1" in galaxy_url or "localhost" in galaxy_url: | ||
# Local Galaxy server - create a symbolic link instead of a copy | ||
gi.libraries.upload_from_galaxy_filesystem( | ||
library_id=lib['id'], | ||
filesystem_paths=local_parent_dir + filepathToString(filepath), | ||
folder_id=galaxy_parent_dir['id'], | ||
file_type=filetype, | ||
link_data_only="link_to_files") | ||
else: | ||
# Remote Galaxy server - copy files from local machine | ||
gi.libraries.upload_file_from_local_path( | ||
library_id=lib['id'], | ||
file_local_path=local_parent_dir + filepathToString(filepath), | ||
folder_id=galaxy_parent_dir['id']) | ||
else: | ||
if verbose: print("File exists - " + filename) | ||
|
||
def filepathToString(filepath): | ||
""" | ||
Turn a list of a filepath into a string. | ||
e.g. turn ['refseq', 'salmonella', 'abc.gbk'] into '/refseq/salmonella/abc.gbk' | ||
:param filepath: The filepath as a list | ||
:return: The string representing the filepath. | ||
""" | ||
|
||
return "/"+"/".join(filepath) | ||
|
||
def getUserIDFromEmail(email, all_users): | ||
""" | ||
Function to get the Galaxy user ID given a user's email. | ||
:param email: The email of the user to get the ID for | ||
:param all_users: All galaxy users, obtained from gi.roles.get_roles() | ||
:return: The user ID if user exists, or None | ||
""" | ||
match = next((user for user in all_users if user['name'].lower() == email.strip().lower()), None) | ||
if match: | ||
return match['id'] | ||
return None | ||
|
||
def getLibraryPermissions(gi, lib): | ||
""" | ||
Function to get the existing galaxy data library permissions, and only return user ID's (no emails). | ||
:param gi: The galaxy instance object | ||
:param lib: The galaxy library object | ||
:return: A dictionary containing key:val pairs of data library permission: list of user IDs | ||
""" | ||
permissions = gi.libraries.get_library_permissions(lib['id']) | ||
for key, val in permissions.items(): | ||
user_ids = [] | ||
for item in val: | ||
# Email at index 0, ID at index 1. | ||
user_ids.append(item[1]) | ||
permissions[key] = user_ids | ||
return permissions | ||
|
||
def main(): | ||
# Default values. | ||
galaxy_url = 'http://127.0.0.1:8080/galaxy/' | ||
galaxy_key = '' | ||
file_types=['fna', 'faa', 'ffn', 'gbk', 'gff', 'fq', 'fasta', 'fa'] | ||
|
||
# Get things like API Key, RefSeq directory and genus from command line. | ||
parser = argparse.ArgumentParser(description='Make a galaxy data library from a file/directory structure.') | ||
|
||
parser.add_argument('directory', type=str, help='the directory to make a data library from') | ||
parser.add_argument('-u', '--url', type=str, help='the Galaxy URL', default=galaxy_url) | ||
parser.add_argument('-k', '--key', type=str, help='the Galaxy API key to use (overrides default)', default=galaxy_key) | ||
parser.add_argument('-n', '--name', type=str, help='the name of the data library to create (overrides default). Using an existing data library name will update the existing library.') | ||
parser.add_argument('-v', '--verbose', action="store_true", help='Print out debugging information') | ||
parser.add_argument('-t', '--filetypes', nargs='*', help='A space-seperated list of filetypes to include in the data library. Defaults to fna, faa, ffn, gbk, gff', default=file_types) | ||
parser.add_argument('-e', '--exclude', action='store_true', help='Exclude the file types specified in -t. Defaults to excluding fna, faa, ffn, gbk, gff') | ||
parser.add_argument('-a', '--allow_users', nargs='*', help='A space-seperated list of emails of users to allow access to the data library. For existing libraries, these users will be appended to the existing permissions list.', default=[]) | ||
|
||
# Parse args. | ||
args = parser.parse_args() | ||
|
||
# Renaming for readability. | ||
local_directory = args.directory | ||
galaxy_url = args.url | ||
galaxy_key = args.key | ||
file_types = args.filetypes | ||
possible_lib_name = args.name | ||
allow_users = args.allow_users | ||
|
||
|
||
# Ensure the local directory and Galaxy URL end in a / to avoid errors later. | ||
if local_directory[-1] != "/": local_directory += "/" | ||
if galaxy_url[-1] != "/": galaxy_url += "/" | ||
|
||
|
||
# Print out debugging info. | ||
if args.verbose: | ||
print("Local directory: " + local_directory) | ||
print("Galaxy URL: " + galaxy_url) | ||
print("Galaxy key: " + galaxy_key) | ||
print("File types: " + str(file_types)) | ||
print("Exclude: " + str(args.exclude)) | ||
print("Users: " + str(allow_users)) | ||
|
||
# Check the RefSeq directory exists, exit if we can't find it. | ||
if not os.path.isdir(local_directory): | ||
printerr("ERROR: The directory could not be found at " + local_directory) | ||
sys.exit(1) | ||
|
||
# Initiating Galaxy connection. | ||
if args.verbose: print("Connecting to Galaxy") | ||
gi = GalaxyInstance(url=galaxy_url, key=galaxy_key) | ||
|
||
# Get list of existing libraries. | ||
libraries = gi.libraries.get_libraries(deleted=False) | ||
|
||
# Set the library name if it's not set. | ||
if not possible_lib_name: | ||
possible_lib_name = [x.strip() for x in local_directory.split("/") if x][-1] | ||
|
||
if args.verbose: print("Library name: " + possible_lib_name) | ||
|
||
# Get existing library info if it does exist, if it doesn't exist create library. | ||
if possible_lib_name in [lib['name'] for lib in libraries if not lib['deleted']]: | ||
if args.verbose: print("Library already exists - checking it is up to date") | ||
|
||
# Get library - assumes there is only one library of that name. | ||
lib = gi.libraries.get_libraries(name=possible_lib_name, deleted=False)[0] | ||
else: | ||
if args.verbose: print("Library doesn't exist - adding new library") | ||
lib = gi.libraries.create_library(possible_lib_name, | ||
"Data library created from directory: " + possible_lib_name) | ||
|
||
# Set user permissions to View only. | ||
if allow_users: | ||
if args.verbose: print("Adding user permissions") | ||
# Get the current permissions, as we want to keep all other non-view permissions the same. | ||
current_permissions = getLibraryPermissions(gi, lib) | ||
all_users = gi.roles.get_roles() | ||
user_ids = [] | ||
for user in allow_users: | ||
user_id = getUserIDFromEmail(user, all_users) | ||
if user_id: | ||
if args.verbose: print("User " + user + " given read access to library") | ||
user_ids.append(user_id) | ||
else: | ||
print("WARNING: User " + user + " not found.") | ||
|
||
gi.libraries.set_library_permissions(lib['id'], | ||
access_in=current_permissions["access_library_role_list"] + user_ids, | ||
modify_in=current_permissions["modify_library_role_list"], | ||
add_in=current_permissions["add_library_item_role_list"], | ||
manage_in=current_permissions["manage_library_role_list"]) | ||
|
||
# Get list of files and directories to include. | ||
filepaths_to_include = getFilesToInclude(local_directory, file_types, args.exclude) | ||
|
||
# For each species specified, go through each folder and add appropriate files | ||
galaxy_parent_dir = gi.libraries.get_folders(lib['id'], name="/")[0] | ||
|
||
# Add each file and directory. | ||
for filepath in filepaths_to_include: | ||
makeDirectoryOrFile(gi, lib, galaxy_parent_dir, local_directory, filepath.split("/"), 0, | ||
galaxy_url, args.verbose) | ||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.