Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/reorganize dewrangle #6

Merged
merged 7 commits into from
Apr 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 42 additions & 18 deletions d3b_dff_cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,50 +5,47 @@
from .modules.validation.check_readgroup import main as check_readgroup
from .modules.validation.check_url import main as check_url
from .modules.dewrangle.volume import main as hash_volume
from .modules.dewrangle.list_jobs import main as list_jobs
from .modules.dewrangle.download_job import main as download_dewrangle_job


def add_hash_arguments(my_parser):
def add_dewrangle_arguments(my_parser):
"""
Create parser for volume hash subcommand.
Add standard arguments for Dewrangle subcommands.
Input:
- my_parser: argparse parser being added to
Output:
- original parser with added arguments
"""
hash_parser = my_parser.add_parser(
"hash", help="Hash volume in Dewrangle"
)
hash_parser.add_argument(
my_parser.add_argument(
"-prefix",
help="Optional, Path prefix. Default: None",
default=None,
required=False,
)
hash_parser.add_argument(
my_parser.add_argument(
"-region",
help="Optional, Bucket AWS region code. Default: us-east-1",
default="us-east-1",
required=False,
)
hash_parser.add_argument(
my_parser.add_argument(
"-billing",
help="Optional, billing group name. When not provided, use default billing group for organization",
default=None,
required=False,
)
hash_parser.add_argument(
my_parser.add_argument(
"-credential",
help="Dewrangle AWS credential name. Default, try to find available credential.",
required=False,
)
hash_parser.add_argument(
my_parser.add_argument(
"-study", help="Study name, global id, or study id", required=True
)
hash_parser.add_argument("-bucket", help="Bucket name", required=True)
hash_parser.set_defaults(func=hash_volume)

return hash_parser
my_parser.add_argument("-bucket", help="Bucket name", required=True)

return my_parser


def main():
Expand Down Expand Up @@ -104,14 +101,42 @@ def main():
parser_url.add_argument("urls", nargs="+", help="One or more URLs to validate")
parser_url.set_defaults(func=check_url)

# Volume Command
# Dewrangle commands
# hash: load a bucket to Dewrangle and hash it
# list_jobs: list jobs run on a bucket
# download: download the results of a job
dewrangle_parser = subparsers.add_parser("dewrangle", help="Dewrangle commands")
dewrangle_subparsers = dewrangle_parser.add_subparsers(
title="Dewrangle Subcommands", dest="dewrangle_command"
)

# volume hash subcommand
hash_parser = add_hash_arguments(dewrangle_subparsers)
# hash subcommand
hash_parser = dewrangle_subparsers.add_parser("hash", help="Hash volume in Dewrangle")
hash_parser = add_dewrangle_arguments(hash_parser)
hash_parser.set_defaults(func=hash_volume)

# list_jobs subcommand
list_parser = dewrangle_subparsers.add_parser(
"list_jobs", help="List volume jobs in Dewrangle"
)
list_parser = add_dewrangle_arguments(list_parser)
list_parser.set_defaults(func=list_jobs)

# download subcommand
dl_parser = dewrangle_subparsers.add_parser(
"download", help="Download job results from Dewrangle"
)
dl_parser.add_argument(
"-jobid",
help="Dewrangle jobid",
required=True,
)
dl_parser.add_argument(
"-outfile",
help="Output file name",
required=True,
)
dl_parser.set_defaults(func=download_dewrangle_job)

args = parser.parse_args()

Expand Down Expand Up @@ -140,6 +165,5 @@ def main():
sys.exit(2)



if __name__ == "__main__":
main()
24 changes: 24 additions & 0 deletions d3b_dff_cli/modules/dewrangle/download_job.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""Download job results from Dewrangle."""

from . import helper_functions as hf

def download_job(jobid, token=None):
"""
Function to download results from Dewrangle
Input: Dewrangle job id
Output: object with job resuls
"""

client = hf.create_gql_client(api_key=token)

return hf.download_job_result(jobid, client=client, api_key=token)


def main(args):
"""Main function."""

status, job_df = download_job(args.jobid)
if status == "Complete":
job_df.to_csv(args.outfile)
else:
print("Job incomplete, please check again later.")
Loading