diff --git a/.goreleaser.yaml b/.goreleaser.yaml index 04d36ab..5bd1bff 100644 --- a/.goreleaser.yaml +++ b/.goreleaser.yaml @@ -21,14 +21,14 @@ before: - go generate ./... builds: - - id: "datasetIngestor" + - id: "scicat-cli" flags: - -trimpath ldflags: - - -s -w -X main.VERSION={{.Version}} # This will set the VERSION variable in the binary to the github tag + - "-s -w -X 'github.com/paulscherrerinstitute/scicat/cmd/commands.VERSION={{.Version}}'" env: - CGO_ENABLED=0 - dir: ./cmd/datasetIngestor/ + dir: ./cmd/ goos: - linux - windows @@ -36,75 +36,7 @@ builds: goarch: - amd64 main: . - binary: datasetIngestor - - - id: "datasetArchiver" - flags: - - -trimpath - ldflags: - - -s -w -X main.VERSION={{.Version}} - env: - - CGO_ENABLED=0 - dir: ./cmd/datasetArchiver/ - goos: - - linux - - windows - - darwin - goarch: - - amd64 - main: . - binary: datasetArchiver - - - id: "datasetRetriever" - flags: - - -trimpath - ldflags: - - -s -w -X main.VERSION={{.Version}} - env: - - CGO_ENABLED=0 - dir: ./cmd/datasetRetriever/ - goos: - - linux - - windows - - darwin - goarch: - - amd64 - main: . - binary: datasetRetriever - - - id: "datasetCleaner" - flags: - - -trimpath - ldflags: - - -s -w -X main.VERSION={{.Version}} - env: - - CGO_ENABLED=0 - dir: ./cmd/datasetCleaner/ - goos: - - linux - - windows - - darwin - goarch: - - amd64 - main: . - binary: datasetCleaner - - - id: "datasetGetProposal" - flags: - - -trimpath - ldflags: - - -s -w -X main.VERSION={{.Version}} - env: - - CGO_ENABLED=0 - dir: ./cmd/datasetGetProposal/ - goos: - - linux - - windows - - darwin - goarch: - - amd64 - main: . - binary: datasetGetProposal + binary: scicat-cli archives: diff --git a/README.md b/README.md index 6260814..cbd023e 100644 --- a/README.md +++ b/README.md @@ -2,13 +2,16 @@ ## Building -For testing, just build `main.go` for each command: +### General Informations + +For testing, build the CLI tool as follows: ``` -go build -o cmd/datasetIngestor/datasetIngestor cmd/datasetIngestor/main.go +cd cmd +go build -o scicat-cli ``` -All applications are built automatically and can be downloaded from the [Releases](https://github.com/paulscherrerinstitute/scicat-cli/releases) section of this repo. +The CLI is built automatically and can be downloaded from the [Releases](https://github.com/paulscherrerinstitute/scicat-cli/releases) section of this repo. To build the applications and target architectures locally, use GoReleaser. Check `.goreleaser.yaml` to see the configurations. To use GoReleaser, you can run the command `goreleaser release --snapshot --clean` in your terminal. This will build the binaries, create the archives and generate the changelog. The `--snapshot flag` ensures that no publishing will happen. diff --git a/cmd/commands/cmd_test.go b/cmd/commands/cmd_test.go new file mode 100644 index 0000000..7556645 --- /dev/null +++ b/cmd/commands/cmd_test.go @@ -0,0 +1,387 @@ +package cmd + +import ( + "testing" + + "github.com/paulscherrerinstitute/scicat/datasetUtils" +) + +func TestMainFlags(t *testing.T) { + // test cases + tests := []struct { + name string + flags map[string]interface{} + args []string + }{ + // datasetArchiver + { + name: "datasetArchiver test without flags", + flags: map[string]interface{}{ + "testenv": false, + "devenv": false, + "localenv": false, + "noninteractive": false, + "version": false, + "user": "", + "token": "", + "tapecopies": 1, + }, + args: []string{"datasetArchiver", "an argument placeholder"}, + }, + { + name: "datasetArchiver test with all flags set", + flags: map[string]interface{}{ + "testenv": true, + "devenv": false, + "localenv": false, + "noninteractive": true, + "version": true, + "user": "usertest:passtest", + "token": "token", + "tapecopies": 6571579, + }, + args: []string{ + "datasetArchiver", + "--testenv", + //"--devenv", + //"--localenv", + "--noninteractive", + "--user", + "usertest:passtest", + "--token", + "token", + "--tapecopies", + "6571579", + "--version", + "an argument placeholder", + }, + }, + // datasetCleaner + { + name: "datasetCleaner test without flags", + flags: map[string]interface{}{ + "testenv": false, + "devenv": false, + "nonInteractive": false, + "removeFromCatalog": false, + "version": false, + "user": "", + "token": "", + }, + args: []string{"datasetCleaner", "argument placeholder"}, + }, + { + name: "datasetCleaner test with all flags set", + flags: map[string]interface{}{ + "testenv": true, + "devenv": false, + "nonInteractive": true, + "removeFromCatalog": true, + "version": true, + "user": "usertest:passtest", + "token": "token", + }, + args: []string{ + "datasetCleaner", + "--testenv", + //"--devenv", + "--nonInteractive", + "--removeFromCatalog", + "--user", + "usertest:passtest", + "--token", + "token", + "--version", + "argument placeholder", + }, + }, + // datasetGetProposal + { + name: "datasetGetProposal test without flags", + flags: map[string]interface{}{ + "testenv": false, + "devenv": false, + "version": false, + "user": "", + "token": "", + "field": "", + }, + args: []string{"datasetGetProposal", "argument placeholder"}, + }, + { + name: "datasetGetProposal test with all flags set", + flags: map[string]interface{}{ + "testenv": true, + "devenv": false, + "version": true, + "user": "usertest:passtest", + "token": "token", + "field": "some field", + }, + args: []string{ + "datasetGetProposal", + "--testenv", + //"--devenv", + "--user", + "usertest:passtest", + "--token", + "token", + "--field", + "some field", + "--version", + "argument placeholder", + }, + }, + // datasetIngestor + { + name: "datasetIngestor test without flags", + flags: map[string]interface{}{ + "ingest": false, + "testenv": false, + "devenv": false, + "localenv": false, + "tunnelenv": false, + "noninteractive": false, + "copy": false, + "nocopy": false, + "autoarchive": false, + "allowexistingsource": false, + "version": false, + "user": "", + "token": "", + "linkfiles": "keepInternalOnly", + "addattachment": "", + "addcaption": "", + "tapecopies": 0, + }, + args: []string{"datasetIngestor", "argument placeholder"}, + }, + { // note: the environment flags are mutually exclusive, not all of them can be set at once + name: "datasetIngestor test with (almost) all flags set", + flags: map[string]interface{}{ + "ingest": true, + "testenv": true, + "devenv": false, + "localenv": false, + "tunnelenv": false, + "noninteractive": true, + "copy": true, + "nocopy": true, + "autoarchive": true, + "allowexistingsource": true, + "version": true, + "user": "usertest:passtest", + "token": "token", + "linkfiles": "somerandomstring", + "addattachment": "random attachment string", + "addcaption": "a seemingly random caption", + "tapecopies": 6571579, + }, + args: []string{ + "datasetIngestor", + "--ingest", + "--testenv", + //"--localenv", + //"--tunnelenv", + "--noninteractive", + "--user", + "usertest:passtest", + "--token", + "token", + "--copy", + "--nocopy", + "--tapecopies", + "6571579", + "--autoarchive", + "--linkfiles", + "somerandomstring", + "--allowexistingsource", + "--addattachment", + "random attachment string", + "--addcaption", + "a seemingly random caption", + "--version", + "argument placeholder", + }, + }, + // datasetPublishData + { + name: "datasetPublishData test without flags", + flags: map[string]interface{}{ + "publish": false, + "testenv": false, + "devenv": false, + "version": false, + "publisheddata": "", + "user": "", + "token": "", + }, + args: []string{"datasetPublishData"}, + }, + { + name: "datasetPublishData test with (almost) all flags set", + flags: map[string]interface{}{ + "publish": true, + "testenv": true, + "devenv": false, + "version": true, + "publisheddata": "some data that was published", + "user": "usertest:passtest", + "token": "token", + }, + args: []string{ + "datasetPublishData", + "--publish", + "--publisheddata", + "some data that was published", + "--testenv", + "--user", + "usertest:passtest", + "--token", + "token", + "--version", + }, + }, + // datasetPublishDataRetrieve + { + name: "datasetPublishDataRetrieve test without flags", + flags: map[string]interface{}{ + "retrieve": false, + "testenv": false, + "devenv": false, + "version": false, + "user": "", + "token": "", + "publisheddata": "", + }, + args: []string{"datasetPublishDataRetrieve"}, + }, + { + name: "datasetPublishDataRetrieve test with (almost) all flags set", + flags: map[string]interface{}{ + "retrieve": true, + "testenv": true, + "devenv": false, + "version": true, + "user": "usertest:passtest", + "token": "token", + "publisheddata": "some data that was published", + }, + args: []string{ + "datasetPublishDataRetrieve", + "--retrieve", + "--testenv", + "--user", + "usertest:passtest", + "--token", + "token", + "--publisheddata", + "some data that was published", + "--version", + }, + }, + // datasetRetriever + { + name: "datasetRetriever test without flags", + flags: map[string]interface{}{ + "retrieve": false, + "nochksum": false, + "testenv": false, + "devenv": false, + "version": false, + "user": "", + "token": "", + "dataset": "", + "ownergroup": "", + }, + args: []string{"datasetRetriever", "placeholder arg"}, + }, + { + name: "datasetRetriever test with (almost) all flags set", + flags: map[string]interface{}{ + "retrieve": true, + "nochksum": true, + "testenv": true, + "devenv": false, + "version": true, + "user": "usertest:passtest", + "token": "token", + "dataset": "some dataset", + "ownergroup": "some owners", + }, + args: []string{ + "datasetRetriever", + "--retrieve", + "--nochksum", + "--testenv", + "--user", + "usertest:passtest", + "--token", + "token", + "--dataset", + "some dataset", + "--ownergroup", + "some owners", + "--version", + "placeholder arg", + }, + }, + // waitForJobFinished + { + name: "waitForJobFinished test without flags", + flags: map[string]interface{}{ + "testenv": false, + "devenv": false, + "version": false, + "user": "", + "token": "", + "job": "", + }, + args: []string{"waitForJobFinished"}, + }, + { + name: "waitForJobFinsihed with (almost) all flags set", + flags: map[string]interface{}{ + "testenv": true, + "devenv": false, + "version": true, + "user": "usertest:passtest", + "token": "token", + "job": "some job to wait for", + }, + args: []string{ + "waitForJobFinished", + "--testenv", + "--user", + "usertest:passtest", + "--token", + "token", + "--job", + "some job to wait for", + "--version", + }, + }, + } + + // running test cases + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + //flag.CommandLine = flag.NewFlagSet(test.name, flag.ExitOnError) + datasetUtils.TestFlags = func(flags map[string]interface{}) { + passing := true + for flag := range test.flags { + if flags[flag] != test.flags[flag] { + t.Logf("%s's value should be \"%v\" but it's \"%v\", or non-matching type", flag, test.flags[flag], flags[flag]) + passing = false + } + } + if !passing { + t.Fail() + } + } + + rootCmd.SetArgs(test.args) + Execute() + }) + } +} diff --git a/cmd/commands/commonConstants.go b/cmd/commands/commonConstants.go new file mode 100644 index 0000000..68d1312 --- /dev/null +++ b/cmd/commands/commonConstants.go @@ -0,0 +1,18 @@ +package cmd + +const MANUAL = "http://melanie.gitpages.psi.ch/SciCatPages" + +const PROD_API_SERVER string = "https://dacat.psi.ch/api/v3" +const TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" +const DEV_API_SERVER string = "https://dacat-development.psi.ch/api/v3" +const LOCAL_API_SERVER string = "http://localhost:3000/api/v3" +const TUNNEL_API_SERVER string = "https://dacat-development.psi.ch:5443/api/v3" + +const PROD_RSYNC_ARCHIVE_SERVER string = "pb-archive.psi.ch" +const TEST_RSYNC_ARCHIVE_SERVER string = "pbt-archive.psi.ch" +const DEV_RSYNC_ARCHIVE_SERVER string = "arematest2in.psi.ch" +const LOCAL_RSYNC_ARCHIVE_SERVER string = "localhost" +const TUNNEL_RSYNC_ARCHIVE_SERVER string = "arematest2in.psi.ch:2022" + +const PUBLISHServer string = "doi2.psi.ch" +const RETRIEVELocation string = "/data/archiveManager/retrieve/" diff --git a/cmd/commands/datasetArchiver.go b/cmd/commands/datasetArchiver.go new file mode 100644 index 0000000..d70a993 --- /dev/null +++ b/cmd/commands/datasetArchiver.go @@ -0,0 +1,146 @@ +package cmd + +import ( + "bufio" + "crypto/tls" + "fmt" + "log" + "net/http" + "os" + "strings" + "time" + + "github.com/fatih/color" + "github.com/paulscherrerinstitute/scicat/datasetUtils" + "github.com/spf13/cobra" +) + +var datasetArchiverCmd = &cobra.Command{ + Use: "datasetArchiver [options] (ownerGroup | space separated list of datasetIds)", + Short: "Archives all datasets in state datasetCreated from a given ownerGroup", + Long: `Tool to archive datasets to the data catalog. + +You must choose either an ownerGroup, in which case all archivable datasets +of this ownerGroup not yet archived will be archived. +Or you choose a (list of) datasetIds, in which case all archivable datasets +of this list not yet archived will be archived. + +For further help see "` + MANUAL + `"`, + Args: cobra.MinimumNArgs(1), + Run: func(cmd *cobra.Command, args []string) { + // consts & vars + var client = &http.Client{ + Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: false}}, + Timeout: 10 * time.Second} + + const CMD = "datasetArchiver" + var scanner = bufio.NewScanner(os.Stdin) + + var APIServer string + var env string + + // pass parameters + userpass, _ := cmd.Flags().GetString("user") + token, _ := cmd.Flags().GetString("token") + tapecopies, _ := cmd.Flags().GetInt("tapecopies") + testenvFlag, _ := cmd.Flags().GetBool("testenv") + localenvFlag, _ := cmd.Flags().GetBool("localenv") + devenvFlag, _ := cmd.Flags().GetBool("devenv") + nonInteractiveFlag, _ := cmd.Flags().GetBool("noninteractive") + showVersion, _ := cmd.Flags().GetBool("version") + + if datasetUtils.TestFlags != nil { + datasetUtils.TestFlags(map[string]interface{}{ + "user": userpass, + "token": token, + "tapecopies": tapecopies, + "testenv": testenvFlag, + "localenv": localenvFlag, + "devenv": devenvFlag, + "noninteractive": nonInteractiveFlag, + "version": showVersion, + }) + return + } + + // execute command + if showVersion { + fmt.Printf("%s\n", VERSION) + return + } + + // check for program version only if running interactively + datasetUtils.CheckForNewVersion(client, CMD, VERSION) + + if testenvFlag { + APIServer = TEST_API_SERVER + env = "test" + } else if devenvFlag { + APIServer = DEV_API_SERVER + env = "dev" + } else if localenvFlag { + APIServer = LOCAL_API_SERVER + env = "local" + } else { + APIServer = PROD_API_SERVER + env = "production" + } + + color.Set(color.FgGreen) + log.Printf("You are about to archive dataset(s) to the === %s === data catalog environment...", env) + color.Unset() + + ownerGroup := "" + inputdatasetList := make([]string, 0) + + // argsWithoutProg := os.Args[1:] + if len(args) == 0 { + log.Println("invalid number of args") + return + } else if len(args) == 1 && !strings.Contains(args[0], "/") { + ownerGroup = args[0] + } else { + inputdatasetList = args[0:] + } + + auth := &datasetUtils.RealAuthenticator{} + user, _ := datasetUtils.Authenticate(auth, client, APIServer, &token, &userpass) + + archivableDatasets := datasetUtils.GetArchivableDatasets(client, APIServer, ownerGroup, inputdatasetList, user["accessToken"]) + if len(archivableDatasets) > 0 { + archive := "" + if nonInteractiveFlag { + archive = "y" + } else { + fmt.Printf("\nDo you want to archive these %v datasets (y/N) ? ", len(archivableDatasets)) + scanner.Scan() + archive = scanner.Text() + } + if archive != "y" { + log.Fatalf("Okay the archive process is stopped here, no datasets will be archived\n") + } else { + log.Printf("You chose to archive the new datasets\n") + log.Printf("Submitting Archive Job for the ingested datasets.\n") + jobId := datasetUtils.CreateJob(client, APIServer, user, archivableDatasets, &tapecopies) + fmt.Println(jobId) + } + } else { + log.Fatalf("No archivable datasets remaining") + } + }, +} + +func init() { + rootCmd.AddCommand(datasetArchiverCmd) + + datasetArchiverCmd.Flags().String("user", "", "Defines optional username and password") + datasetArchiverCmd.Flags().String("token", "", "Defines optional API token instead of username:password") + datasetArchiverCmd.Flags().Int("tapecopies", 1, "Number of tapecopies to be used for archiving") + datasetArchiverCmd.Flags().Bool("testenv", false, "Use test environment (qa) instead or production") + datasetArchiverCmd.Flags().Bool("localenv", false, "Use local environment (local) instead or production") + datasetArchiverCmd.Flags().Bool("devenv", false, "Use development environment instead or production") + datasetArchiverCmd.Flags().Bool("noninteractive", false, "Defines if no questions will be asked, just do it - make sure you know what you are doing") + datasetArchiverCmd.Flags().Bool("version", false, "Show version number and exit") + + datasetArchiverCmd.MarkFlagsMutuallyExclusive("testenv", "localenv", "devenv") +} diff --git a/cmd/commands/datasetCleaner.go b/cmd/commands/datasetCleaner.go new file mode 100644 index 0000000..dd765e8 --- /dev/null +++ b/cmd/commands/datasetCleaner.go @@ -0,0 +1,131 @@ +package cmd + +import ( + "crypto/tls" + "fmt" + "log" + "net/http" + "time" + + "github.com/fatih/color" + "github.com/paulscherrerinstitute/scicat/datasetUtils" + "github.com/spf13/cobra" +) + +var datasetCleanerCmd = &cobra.Command{ + Use: "datasetCleaner [options] datasetPid", + Short: "Remove dataset from archive and optionally from data catalog", + Long: `Tool to remove datasets from the data catalog. + +If Datablock entries exist for a given dataset, a reset job will be launched. + +If the Dataset should be removed from the data catalog, the corresponding +documents in Dataset and OrigDatablock will be deleted as well. This will only +happen once the reset job is finished. The tool will try to remove the dataset +catalog entries each minute until Dataset is found to be in archivable state again, +and only then it will be deleted in the data catalog. + +Note: these actions can not be un-done! Be careful! + +For further help see "` + MANUAL + `"`, + Args: cobra.ExactArgs(1), + Run: func(cmd *cobra.Command, args []string) { + // vars & consts + var client = &http.Client{ + Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: false}}, + Timeout: 10 * time.Second} + + const CMD = "datasetCleaner" + + var APIServer string + var env string + + // pass parameters + removeFromCatalogFlag, _ := cmd.Flags().GetBool("removeFromCatalog") + nonInteractiveFlag, _ := cmd.Flags().GetBool("nonInteractive") + testenvFlag, _ := cmd.Flags().GetBool("testenv") + devenvFlag, _ := cmd.Flags().GetBool("devenv") + userpass, _ := cmd.Flags().GetString("user") + token, _ := cmd.Flags().GetString("token") + showVersion, _ := cmd.Flags().GetBool("version") + + if datasetUtils.TestFlags != nil { + datasetUtils.TestFlags(map[string]interface{}{ + "user": userpass, + "token": token, + "testenv": testenvFlag, + "devenv": devenvFlag, + "nonInteractive": nonInteractiveFlag, + "removeFromCatalog": removeFromCatalogFlag, + "version": showVersion, + }) + return + } + + // execute command + if showVersion { + fmt.Printf("%s\n", VERSION) + return + } + + // check for program version only if running interactively + + datasetUtils.CheckForNewVersion(client, CMD, VERSION) + datasetUtils.CheckForServiceAvailability(client, testenvFlag, true) + + //} + + if testenvFlag { + APIServer = TEST_API_SERVER + env = "test" + } else if devenvFlag { + APIServer = DEV_API_SERVER + env = "dev" + } else { + APIServer = PROD_API_SERVER + env = "production" + } + + color.Set(color.FgRed) + log.Printf("You are about to remove a dataset from the === %s === data catalog environment...", env) + color.Unset() + + pid := "" + + if len(args) == 1 { + pid = args[0] + } else { + log.Println("invalid number of args") + return + } + + auth := &datasetUtils.RealAuthenticator{} + user, _ := datasetUtils.Authenticate(auth, client, APIServer, &token, &userpass) + + if user["username"] != "archiveManager" { + log.Fatalf("You must be archiveManager to be allowed to delete datasets\n") + } + + datasetUtils.RemoveFromArchive(client, APIServer, pid, user, nonInteractiveFlag) + + if removeFromCatalogFlag { + datasetUtils.RemoveFromCatalog(client, APIServer, pid, user, nonInteractiveFlag) + } else { + log.Println("To also delete the dataset from the catalog add the flag -removeFromCatalog") + } + }, +} + +func init() { + rootCmd.AddCommand(datasetCleanerCmd) + + datasetCleanerCmd.Flags().Bool("removeFromCatalog", false, "Defines if the dataset should also be deleted from data catalog") + datasetCleanerCmd.Flags().Bool("nonInteractive", false, "Defines if no questions will be asked, just do it - make sure you know what you are doing") + datasetCleanerCmd.Flags().Bool("testenv", false, "Use test environment (qa) instead of production environment") + datasetCleanerCmd.Flags().Bool("devenv", false, "Use development environment instead of production environment (developers only)") + datasetCleanerCmd.Flags().String("user", "", "Defines optional username:password string") + datasetCleanerCmd.Flags().String("token", "", "Defines optional API token instead of username:password") + datasetCleanerCmd.Flags().Bool("version", false, "Show version number and exit") + + datasetCleanerCmd.MarkFlagsMutuallyExclusive("testenv", "devenv") +} diff --git a/cmd/commands/datasetGetProposal.go b/cmd/commands/datasetGetProposal.go new file mode 100644 index 0000000..3ba67ff --- /dev/null +++ b/cmd/commands/datasetGetProposal.go @@ -0,0 +1,124 @@ +package cmd + +import ( + "crypto/tls" + "encoding/json" + "fmt" + "log" + "net/http" + "os" + "time" + + "github.com/fatih/color" + "github.com/paulscherrerinstitute/scicat/datasetUtils" + "github.com/spf13/cobra" +) + +var datasetGetProposalCmd = &cobra.Command{ + Use: "datasetGetProposal [options] ownerGroup", + Short: "Returns the proposal information for a given ownerGroup", + Long: `Tool to retrieve proposal information for a given ownerGroup. + +For further help see "` + MANUAL + `"`, + Args: cobra.ExactArgs(1), + Run: func(cmd *cobra.Command, args []string) { + // vars and constants + var client = &http.Client{ + Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: false}}, + Timeout: 10 * time.Second} + + const APP = "datasetGetProposal" + + var APIServer string + var env string + + // pass parameters + userpass, _ := cmd.Flags().GetString("user") + token, _ := cmd.Flags().GetString("token") + fieldname, _ := cmd.Flags().GetString("field") + testenvFlag, _ := cmd.Flags().GetBool("testenv") + devenvFlag, _ := cmd.Flags().GetBool("devenv") + showVersion, _ := cmd.Flags().GetBool("version") + + if datasetUtils.TestFlags != nil { + datasetUtils.TestFlags(map[string]interface{}{ + "user": userpass, + "token": token, + "field": fieldname, + "testenv": testenvFlag, + "devenv": devenvFlag, + "version": showVersion, + }) + return + } + + // execute command + if showVersion { + fmt.Printf("%s\n", VERSION) + return + } + + // check for program version only if running interactively + datasetUtils.CheckForNewVersion(client, APP, VERSION) + + if testenvFlag { + APIServer = TEST_API_SERVER + env = "test" + } else if devenvFlag { + APIServer = DEV_API_SERVER + env = "dev" + } else { + APIServer = PROD_API_SERVER + env = "production" + } + + color.Set(color.FgGreen) + log.Printf("You are about to retrieve the proposal information from the === %s === data catalog environment...", env) + color.Unset() + + ownerGroup := "" + + //TODO cleanup text formatting: + if len(args) == 1 { + ownerGroup = args[0] + } else { + log.Println("invalid number of args") + return + } + + auth := &datasetUtils.RealAuthenticator{} + user, accessGroups := datasetUtils.Authenticate(auth, client, APIServer, &token, &userpass) + proposal, err := datasetUtils.GetProposal(client, APIServer, ownerGroup, user, accessGroups) + if err != nil { + log.Fatal(err) + } + + // proposal is of type map[string]interface{} + + if len(proposal) > 0 { + if fieldname != "" { + fmt.Println(proposal[fieldname]) + } else { + pretty, _ := json.MarshalIndent(proposal, "", " ") + fmt.Printf("%s\n", pretty) + } + os.Exit(0) + } else { + log.Printf("No Proposal information found for group %v\n", ownerGroup) + os.Exit(1) + } + }, +} + +func init() { + rootCmd.AddCommand(datasetGetProposalCmd) + + datasetGetProposalCmd.Flags().String("user", "", "Defines optional username and password") + datasetGetProposalCmd.Flags().String("token", "", "Defines optional API token instead of username:password") + datasetGetProposalCmd.Flags().String("field", "", "Defines optional field name , whose value should be returned instead of full information") + datasetGetProposalCmd.Flags().Bool("testenv", false, "Use test environment (qa) instead or production") + datasetGetProposalCmd.Flags().Bool("devenv", false, "Use development environment instead or production") + datasetGetProposalCmd.Flags().Bool("version", false, "Show version number and exit") + + datasetGetProposalCmd.MarkFlagsMutuallyExclusive("testenv", "devenv") +} diff --git a/cmd/commands/datasetIngestor.go b/cmd/commands/datasetIngestor.go new file mode 100644 index 0000000..a458221 --- /dev/null +++ b/cmd/commands/datasetIngestor.go @@ -0,0 +1,402 @@ +package cmd + +import ( + "bufio" + "crypto/tls" + "encoding/json" + "fmt" + "log" + "net/http" + "os" + "path/filepath" + "strings" + "time" + + "github.com/fatih/color" + "github.com/paulscherrerinstitute/scicat/datasetIngestor" + "github.com/paulscherrerinstitute/scicat/datasetUtils" + "github.com/spf13/cobra" +) + +var datasetIngestorCmd = &cobra.Command{ + Use: "datasetIngestor", + Short: "Define and add a dataset to the SciCat datacatalog", + Long: `Purpose: define and add a dataset to the SciCat datacatalog + +This command must be run on the machine having access to the data +which comprises the dataset. It takes one or two input +files and creates the necessary messages which trigger +the creation of the corresponding datacatalog entries + +For further help see "` + MANUAL + `" + +Special hints for the decentral use case, where data is copied first to intermediate storage: +For Linux you need to have a valid Kerberos tickets, which you can get via the kinit command. +For Windows you need instead to specify -user username:password on the command line.`, + Args: cobra.RangeArgs(1, 2), + Run: func(cmd *cobra.Command, args []string) { + var tooLargeDatasets = 0 + var emptyDatasets = 0 + + var originalMap = make(map[string]string) + + var client = &http.Client{ + Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: false}}, + Timeout: 120 * time.Second} + + // const PROD_RSYNC_ARCHIVE_SERVER string = "ebarema2in.psi.ch" + // const TEST_RSYNC_ARCHIVE_SERVER string = "ebaremat1in.psi.ch" + // const DEV_RSYNC_ARCHIVE_SERVER string = "arematest2in.psi.ch" + + const CMD = "datasetIngestor" + + const TOTAL_MAXFILES = 400000 + + var scanner = bufio.NewScanner(os.Stdin) + + var APIServer string + var RSYNCServer string + var env string + + // pass parameters + ingestFlag, _ := cmd.Flags().GetBool("ingest") + testenvFlag, _ := cmd.Flags().GetBool("testenv") + devenvFlag, _ := cmd.Flags().GetBool("devenv") + localenvFlag, _ := cmd.Flags().GetBool("localenv") + tunnelenvFlag, _ := cmd.Flags().GetBool("tunnelenv") + noninteractiveFlag, _ := cmd.Flags().GetBool("noninteractive") + userpass, _ := cmd.Flags().GetString("user") + token, _ := cmd.Flags().GetString("token") + copyFlag, _ := cmd.Flags().GetBool("copy") + nocopyFlag, _ := cmd.Flags().GetBool("nocopy") + tapecopies, _ := cmd.Flags().GetInt("tapecopies") + autoarchiveFlag, _ := cmd.Flags().GetBool("autoarchive") + linkfiles, _ := cmd.Flags().GetString("linkfiles") + allowExistingSourceFolder, _ := cmd.Flags().GetBool("allowexistingsource") + addAttachment, _ := cmd.Flags().GetString("addattachment") + addCaption, _ := cmd.Flags().GetString("addcaption") + showVersion, _ := cmd.Flags().GetBool("version") + + if datasetUtils.TestFlags != nil { + datasetUtils.TestFlags(map[string]interface{}{ + "ingest": ingestFlag, + "testenv": testenvFlag, + "devenv": devenvFlag, + "localenv": localenvFlag, + "tunnelenv": tunnelenvFlag, + "noninteractive": noninteractiveFlag, + "user": userpass, + "token": token, + "copy": copyFlag, + "nocopy": nocopyFlag, + "tapecopies": tapecopies, + "autoarchive": autoarchiveFlag, + "linkfiles": linkfiles, + "allowexistingsource": allowExistingSourceFolder, + "addattachment": addAttachment, + "addcaption": addCaption, + "version": showVersion, + }) + return + } + + // functions use this flag in a way where "nil -> unset" + var allowExistingSourceFolderPtr *bool = &allowExistingSourceFolder + if !noninteractiveFlag && !cmd.Flags().Lookup("allowexistingsource").Changed { + allowExistingSourceFolderPtr = nil + } + + if showVersion { + fmt.Printf("%s\n", VERSION) + return + } + + // check for program version + datasetUtils.CheckForNewVersion(client, CMD, VERSION) + datasetUtils.CheckForServiceAvailability(client, testenvFlag, autoarchiveFlag) + + if testenvFlag { + APIServer = TEST_API_SERVER + RSYNCServer = TEST_RSYNC_ARCHIVE_SERVER + env = "test" + } else if devenvFlag { + APIServer = DEV_API_SERVER + RSYNCServer = DEV_RSYNC_ARCHIVE_SERVER + env = "dev" + } else if localenvFlag { + APIServer = LOCAL_API_SERVER + RSYNCServer = LOCAL_RSYNC_ARCHIVE_SERVER + env = "local" + } else if tunnelenvFlag { + APIServer = TUNNEL_API_SERVER + RSYNCServer = TUNNEL_RSYNC_ARCHIVE_SERVER + env = "dev" + } else { + APIServer = PROD_API_SERVER + RSYNCServer = PROD_RSYNC_ARCHIVE_SERVER + env = "production" + } + + color.Set(color.FgGreen) + log.Printf("You are about to add a dataset to the === %s === data catalog environment...", env) + color.Unset() + + metadatafile := "" + filelistingPath := "" + folderlistingPath := "" + absFileListing := "" + + if len(args) == 1 { + metadatafile = args[0] + } else if len(args) == 2 { + metadatafile = args[0] + if args[1] == "folderlisting.txt" { + folderlistingPath = args[1] + } else { + filelistingPath = args[1] + absFileListing, _ = filepath.Abs(filelistingPath) + } + } else { + log.Println("invalid number of args") + return + } + + // TODO: change pointer parameter types to regular forms as they're unnecessary to be + // as pointers + auth := &datasetUtils.RealAuthenticator{} + user, accessGroups := datasetUtils.Authenticate(auth, client, APIServer, &token, &userpass) + + /* TODO Add info about policy settings and that autoarchive will take place or not */ + + metaDataMap, sourceFolder, beamlineAccount, err := datasetIngestor.CheckMetadata(client, APIServer, metadatafile, user, accessGroups) + if err != nil { + log.Fatal("Error in CheckMetadata function: ", err) + } + //log.Printf("metadata object: %v\n", metaDataMap) + + // assemble list of folders (=datasets) to created + var folders []string + if folderlistingPath == "" { + folders = append(folders, sourceFolder) + } else { + // get folders from file + folderlist, err := os.ReadFile(folderlistingPath) + if err != nil { + log.Fatal(err) + } + lines := strings.Split(string(folderlist), "\n") + // remove all empty and comment lines + for _, sourceFolder := range lines { + if sourceFolder != "" && string(sourceFolder[0]) != "#" { + // convert into canonical form only for certain online data linked from eaccounts home directories + var parts = strings.Split(sourceFolder, "/") + if len(parts) > 3 && parts[3] == "data" { + realSourceFolder, err := filepath.EvalSymlinks(sourceFolder) + if err != nil { + log.Fatalf("Failed to find canonical form of sourceFolder:%v %v", sourceFolder, err) + } + color.Set(color.FgYellow) + log.Printf("Transform sourceFolder %v to canonical form: %v", sourceFolder, realSourceFolder) + color.Unset() + folders = append(folders, realSourceFolder) + } else { + folders = append(folders, sourceFolder) + } + } + } + } + // log.Printf("Selected folders: %v\n", folders) + + // test if a sourceFolder already used in the past and give warning + datasetIngestor.TestForExistingSourceFolder(folders, client, APIServer, user["accessToken"], allowExistingSourceFolderPtr) + + // TODO ask archive system if sourcefolder is known to them. If yes no copy needed, otherwise + // a destination location is defined by the archive system + // for now let the user decide if he needs a copy + + // now everything is prepared, start to loop over all folders + var skip = "" + // check if skip flag is globally defined via flags: + if cmd.Flags().Lookup("linkfiles").Changed { + switch linkfiles { + case "delete": + skip = "sA" + case "keep": + skip = "kA" + default: + skip = "dA" // default behaviour = keep internal for all + } + } + + var datasetList []string + for _, sourceFolder := range folders { + // ignore empty lines + if sourceFolder == "" { + continue + } + metaDataMap["sourceFolder"] = sourceFolder + log.Printf("Scanning files in dataset %s", sourceFolder) + + fullFileArray, startTime, endTime, owner, numFiles, totalSize := + datasetIngestor.AssembleFilelisting(sourceFolder, filelistingPath, &skip) + //log.Printf("full fileListing: %v\n Start and end time: %s %s\n ", fullFileArray, startTime, endTime) + log.Printf("The dataset contains %v files with a total size of %v bytes.", numFiles, totalSize) + + if totalSize == 0 { + emptyDatasets++ + color.Set(color.FgRed) + log.Println("This dataset contains no files and will therefore NOT be stored. ") + color.Unset() + } else if numFiles > TOTAL_MAXFILES { + tooLargeDatasets++ + color.Set(color.FgRed) + log.Printf("This dataset exceeds the current filecount limit of the archive system of %v files and will therefore NOT be stored.\n", TOTAL_MAXFILES) + color.Unset() + } else { + // TODO: change tapecopies param type of UpadateMetaData from pointer to regular int + // (it's not changed within the function) + datasetIngestor.UpdateMetaData(client, APIServer, user, originalMap, metaDataMap, startTime, endTime, owner, &tapecopies) + pretty, _ := json.MarshalIndent(metaDataMap, "", " ") + + log.Printf("Updated metadata object:\n%s\n", pretty) + + // check if data is accesible at archive server, unless beamline account (assumed to be centrally available always) + // and unless copy flag defined via command line + if !copyFlag && !nocopyFlag { + if !beamlineAccount { + err := datasetIngestor.CheckDataCentrallyAvailable(user["username"], RSYNCServer, sourceFolder) + if err != nil { + color.Set(color.FgYellow) + log.Printf("The source folder %v is not centrally available (decentral use case).\nThe data must first be copied to a rsync cache server.\n ", sourceFolder) + color.Unset() + copyFlag = true + // check if user account + if len(accessGroups) == 0 { + color.Set(color.FgRed) + log.Println("For the decentral case you must use a personal account. Beamline accounts are not supported.") + color.Unset() + os.Exit(1) + } + if !noninteractiveFlag { + log.Printf("Do you want to continue (Y/n)? ") + scanner.Scan() + continueFlag := scanner.Text() + if continueFlag == "n" { + log.Fatalln("Further ingests interrupted because decentral case detected, but no copy wanted.") + } + } + } + } else { + copyFlag = false + } + } else { + if !copyFlag { + copyFlag = !nocopyFlag + } + } + if ingestFlag { + // create ingest . For decentral case delay setting status to archivable until data is copied + archivable := false + if _, ok := metaDataMap["datasetlifecycle"]; !ok { + metaDataMap["datasetlifecycle"] = map[string]interface{}{} + } + if copyFlag { + // do not override existing fields + metaDataMap["datasetlifecycle"].(map[string]interface{})["isOnCentralDisk"] = false + metaDataMap["datasetlifecycle"].(map[string]interface{})["archiveStatusMessage"] = "filesNotYetAvailable" + metaDataMap["datasetlifecycle"].(map[string]interface{})["archivable"] = archivable + } else { + archivable = true + metaDataMap["datasetlifecycle"].(map[string]interface{})["isOnCentralDisk"] = true + metaDataMap["datasetlifecycle"].(map[string]interface{})["archiveStatusMessage"] = "datasetCreated" + metaDataMap["datasetlifecycle"].(map[string]interface{})["archivable"] = archivable + } + datasetId := datasetIngestor.SendIngestCommand(client, APIServer, metaDataMap, fullFileArray, user) + // add attachment optionally + if addAttachment != "" { + datasetIngestor.AddAttachment(client, APIServer, datasetId, metaDataMap, user["accessToken"], addAttachment, addCaption) + } + if copyFlag { + err := datasetIngestor.SyncDataToFileserver(datasetId, user, RSYNCServer, sourceFolder, absFileListing) + if err == nil { + // delayed enabling + archivable = true + datasetIngestor.SendFilesReadyCommand(client, APIServer, datasetId, user) + } else { + color.Set(color.FgRed) + log.Printf("The command to copy files exited with error %v \n", err) + log.Printf("The dataset %v is not yet in an archivable state\n", datasetId) + // TODO let user decide to delete dataset entry + // datasetIngestor.DeleteDatasetEntry(client, APIServer, datasetId, user["accessToken"]) + color.Unset() + } + } + + if archivable { + datasetList = append(datasetList, datasetId) + } + } + datasetIngestor.ResetUpdatedMetaData(originalMap, metaDataMap) + + } + } + + if !ingestFlag { + color.Set(color.FgRed) + log.Printf("Note: you run in 'dry' mode to simply to check data consistency. Use the --ingest flag to really ingest datasets.") + } + + if emptyDatasets > 0 { + color.Set(color.FgRed) + log.Printf("Number of datasets not stored because they are empty:%v\n. Please note that this will cancel any subsequent archive steps from this job !\n", emptyDatasets) + } + if tooLargeDatasets > 0 { + color.Set(color.FgRed) + log.Printf("Number of datasets not stored because of too many files:%v\nPlease note that this will cancel any subsequent archive steps from this job !\n", tooLargeDatasets) + } + color.Unset() + datasetIngestor.PrintFileInfos() + + // stop here if empty datasets appeared + if emptyDatasets > 0 || tooLargeDatasets > 0 { + os.Exit(1) + } + // start archive job + if autoarchiveFlag && ingestFlag { + log.Printf("Submitting Archive Job for the ingested datasets.\n") + // TODO: change param type from pointer to regular as it is unnecessary + // for it to be passed as pointer + datasetUtils.CreateJob(client, APIServer, user, datasetList, &tapecopies) + } + + // print out results to STDOUT, one line per dataset + for i := 0; i < len(datasetList); i++ { + fmt.Println(datasetList[i]) + } + + }, +} + +func init() { + rootCmd.AddCommand(datasetIngestorCmd) + + datasetIngestorCmd.Flags().Bool("ingest", false, "Defines if this command is meant to actually ingest data") + datasetIngestorCmd.Flags().Bool("testenv", false, "Use test environment (qa) instead of production environment") + datasetIngestorCmd.Flags().Bool("devenv", false, "Use development environment instead of production environment (developers only)") + datasetIngestorCmd.Flags().Bool("localenv", false, "Use local environment instead of production environment (developers only)") + datasetIngestorCmd.Flags().Bool("tunnelenv", false, "Use tunneled API server at port 5443 to access development instance (developers only)") + datasetIngestorCmd.Flags().Bool("noninteractive", false, "If set no questions will be asked and the default settings for all undefined flags will be assumed") + datasetIngestorCmd.Flags().String("user", "", "Defines optional username:password string. This can be used both for access to the data catalog API and for access to the intermediate storage server for the decentral use case") + datasetIngestorCmd.Flags().String("token", "", "Defines API token for access to the data catalog API. It is now mandatory for normal user accounts, but optional for functional accounts. It takes precedence over username/pw.") + datasetIngestorCmd.Flags().Bool("copy", false, "Defines if files should be copied from your local system to a central server before ingest (i.e. your data is not centrally available and therefore needs to be copied ='decentral' case). copyFlag has higher priority than nocopyFlag. If neither flag is defined the tool will try to make the best guess.") + datasetIngestorCmd.Flags().Bool("nocopy", false, "Defines if files should *not* be copied from your local system to a central server before ingest (i.e. your data is centrally available and therefore does not need to be copied ='central' case).") + datasetIngestorCmd.Flags().Int("tapecopies", 0, "Number of tapecopies to be used for archiving") + datasetIngestorCmd.Flags().Bool("autoarchive", false, "Option to create archive job automatically after ingestion") + datasetIngestorCmd.Flags().String("linkfiles", "keepInternalOnly", "Define what to do with symbolic links: (keep|delete|keepInternalOnly)") + datasetIngestorCmd.Flags().Bool("allowexistingsource", false, "Defines if existing sourceFolders can be reused") + datasetIngestorCmd.Flags().String("addattachment", "", "Filename of image to attach (single dataset case only)") + datasetIngestorCmd.Flags().String("addcaption", "", "Optional caption to be stored with attachment (single dataset case only)") + datasetIngestorCmd.Flags().Bool("version", false, "Show version number and exit") + + datasetIngestorCmd.MarkFlagsMutuallyExclusive("testenv", "devenv", "localenv", "tunnelenv") +} diff --git a/cmd/commands/datasetPublishData.go b/cmd/commands/datasetPublishData.go new file mode 100644 index 0000000..37cd2d5 --- /dev/null +++ b/cmd/commands/datasetPublishData.go @@ -0,0 +1,347 @@ +package cmd + +import ( + "bytes" + "crypto/tls" + "encoding/json" + "flag" + "fmt" + "io" + "log" + "net/http" + "os" + "os/exec" + "strings" + "text/template" + "time" + "unicode/utf8" + + "github.com/fatih/color" + "github.com/paulscherrerinstitute/scicat/datasetUtils" + "github.com/spf13/cobra" +) + +var datasetPublishDataCmd = &cobra.Command{ + Use: "datasetPublishData [options]", + Short: "Copy all files from a publisheddata entry (list of datasets) to publication server", + Long: `Tool to publish datasets from the intermediate cache server of the tape archive +to the publication server. Copies the files, creates and installs a download page +and updates the downloadLink value for the specified PublishedData document. + +This script must be run on the retrieve servers (from root) and pushes data to the publication server +hosted in the DMZ. It requires that a previous retrieve job for the datasets, executed +by the user "archiveManager", is finished, such that data are available in the retrieve +location. + +The resulting files from dataset folders will be stored under the full original sourcePath +on the publication server. + +Usage example: +./datasetPublishData -user archiveManager:password -publisheddata 10.16907/05a50450-767f-421d-9832-342b57c201 + +To update the PublishedData entry with the downloadLink you have to run the script as user archiveManager.`, + Args: cobra.NoArgs, + Run: func(cmd *cobra.Command, args []string) { + // const PROD_RSYNC_RETRIEVE_SERVER string = "ebarema4in.psi.ch" + // const TEST_RSYNC_RETRIEVE_SERVER string = "ebaremat1in.psi.ch" + // const DEV_RSYNC_RETRIEVE_SERVER string = "arematest2in.psi.ch" + // ===== local consts ===== + + // ===== variables ===== + var APIServer string = PROD_API_SERVER + + var client = &http.Client{ + Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: false}}, + Timeout: 10 * time.Second} + + type PageData struct { + Doi string + PageTitle string + BrowseUrls []string + SizeArray []int + NumFilesArray []int + } + + // ===== functions ===== + assembleRsyncCommands := func(datasetDetails []datasetUtils.Dataset) []string { + batchCommands := make([]string, 0) + for _, dataset := range datasetDetails { + shortDatasetId := strings.Split(dataset.Pid, "/")[1] + fullDest := "/datasets" + dataset.SourceFolder + command := "ssh " + PUBLISHServer + " mkdir -p " + fullDest + ";" + + "ssh " + PUBLISHServer + " chown -R egli " + fullDest + ";" + + "ssh " + PUBLISHServer + " chmod -R 755 " + fullDest + ";" + + "/usr/bin/rsync -av -e ssh " + RETRIEVELocation + shortDatasetId + "/ " + PUBLISHServer + ":" + fullDest + batchCommands = append(batchCommands, command) + } + return batchCommands + } + + executeCommands := func(batchCommands []string) { + log.Printf("\n\n\n====== Starting transfer of dataset files: \n\n") + for _, batchCommand := range batchCommands { + cmd := exec.Command("/bin/sh", "-c", batchCommand) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + //log.Printf("Running %v.\n", cmd.Args) + log.Printf("\n=== Transfer command: %s.\n", batchCommand) + + err := cmd.Run() + + if err != nil { + log.Fatal(err) + } + } + } + + createWebpage := func(urls []string, title string, doi string, datasetDetails []datasetUtils.Dataset, + publishedDataId string, userpass *string, token *string) { + // A Function that returns the longest common prefix path (runes) + // from the array of strings + commonPrefix := func(arr []string) string { + // return shortest string, length given in bytes + findMinLength := func(arr []string) int { + n := len(arr) + min := len(arr[0]) + + for i := 1; i < n; i++ { + if len(arr[i]) < min { + min = len(arr[i]) + } + } + + return min + } + + n := len(arr) + if n == 1 { + return arr[0] + } + + minlenBytes := findMinLength(arr) + + result := "" // Our resultant string + + // loop over runes (UTF8) + + for i, w := 0, 0; i < minlenBytes; i += w { + currentRune, width := utf8.DecodeRuneInString(arr[0][i:]) + // fmt.Printf("%#U starts at byte position %d\n", currentRune, i) + w = width + // loop through other strings + for j := 1; j < n; j++ { + nextRune, _ := utf8.DecodeRuneInString(arr[j][i:]) + if nextRune != currentRune { + // strip off characters after last "/" + parts := strings.Split(result, "/") + result = strings.Join(parts[:len(parts)-1], "/") + "/" + return result + } + } + result = result + string(currentRune) + } + // strip off characters after last "/" + parts := strings.Split(result, "/") + result = strings.Join(parts[:len(parts)-1], "/") + "/" + return result + } + + stringInSlice := func(a string, list []string) bool { + for _, b := range list { + if b == a { + return true + } + } + return false + } + + // log.Printf("Datasetdetails %v", datasetDetails) + tmpl := template.Must(template.ParseFiles("downloadPage.html")) + sizeArray := make([]int, 0) + numFilesArray := make([]int, 0) + for _, datasetDetail := range datasetDetails { + sizeArray = append(sizeArray, datasetDetail.Size) + numFilesArray = append(numFilesArray, datasetDetail.NumberOfFiles) + } + data := PageData{ + Doi: doi, + PageTitle: title, + BrowseUrls: urls, + SizeArray: sizeArray, + NumFilesArray: numFilesArray, + } + + // log.Printf("Pagedata %v", data) + f, err := os.Create("output.html") + if err != nil { + log.Fatal(err) + } + defer f.Close() + tmpl.Execute(f, data) + + // determine location of downloadLink from common part of all sourceFolders + downloadLink := commonPrefix(urls) + fmt.Printf("downloadLink:%v\n", downloadLink) + // move up one level in case that one dataset sourcefolder is equal to downloadLocation + // to avoidto "hide" the sourcefolder loaction by the index.html + if stringInSlice(downloadLink, urls) { + slice := strings.Split(downloadLink, "/") + if len(slice) > 0 { + slice = slice[:len(slice)-1] + } + downloadLink = strings.Join(slice, "/") + } + fmt.Printf("downloadLink2 :%v\n", downloadLink) + + // copy output.html to downloadLink location (remove https://server part) as index.html + startPos := strings.Index(downloadLink, "/datasets") + command := "/usr/bin/rsync -av -e ssh output.html " + PUBLISHServer + ":" + downloadLink[startPos:] + "/index.html" + cmd := exec.Command("/bin/sh", "-c", command) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + log.Printf("\n=== Transfer download page command: %s .\n", command) + err2 := cmd.Run() + if err != nil { + log.Fatal(err2) + } + + // set value in publishedData ============================== + + auth := &datasetUtils.RealAuthenticator{} + user, _ := datasetUtils.Authenticate(auth, client, APIServer, token, userpass) + + type PublishedDataPart struct { + DownloadLink string `json:"downloadLink"` + } + updateData := PublishedDataPart{ + DownloadLink: downloadLink, + } + + cmm, _ := json.Marshal(updateData) + // metadataString := string(cmm) + + myurl := APIServer + "/PublishedData/" + strings.Replace(publishedDataId, "/", "%2F", 1) + "?access_token=" + user["accessToken"] + req, err := http.NewRequest("PATCH", myurl, bytes.NewBuffer(cmm)) + if err != nil { + log.Fatal(err) + } + req.Header.Set("Content-Type", "application/json") + // fmt.Printf("request to message broker:%v\n", req) + resp, err := client.Do(req) + if err != nil { + log.Fatal(err) + } + + defer resp.Body.Close() + if resp.StatusCode == 200 { + io.ReadAll(resp.Body) + log.Printf("Successfully set downloadLink to %v\n", downloadLink) + } else { + log.Fatalf("Failed to update downloadLink on publishedData %v %v\n", resp.StatusCode, publishedDataId) + } + } + + // ===== gather parameters===== + publishFlag, _ := cmd.Flags().GetBool("publish") + publishedDataId, _ := cmd.Flags().GetString("publisheddata") + // datasetId := cmd.Flags().GetString("dataset") + // ownerGroup := cmd.Flags().GetString("ownergroup") + userpass, _ := cmd.Flags().GetString("user") + token, _ := cmd.Flags().GetString("token") + testenvFlag, _ := cmd.Flags().GetBool("testenv") + devenvFlag, _ := cmd.Flags().GetBool("devenv") + showVersion, _ := cmd.Flags().GetBool("version") + + if datasetUtils.TestFlags != nil { + datasetUtils.TestFlags(map[string]interface{}{ + "publish": publishFlag, + "publisheddata": publishedDataId, + "testenv": testenvFlag, + "devenv": devenvFlag, + "user": userpass, + "token": token, + "version": showVersion, + }) + return + } + + // ===== execute command ===== + if showVersion { + fmt.Printf("%s\n", VERSION) + return + } + + var env string + if testenvFlag { + APIServer = TEST_API_SERVER + env = "test" + } else if devenvFlag { + APIServer = DEV_API_SERVER + env = "dev" + } else { + APIServer = PROD_API_SERVER + env = "production" + } + + color.Set(color.FgGreen) + log.Printf("You are about to publish dataset(s) from the === %s === retrieve server...", env) + color.Unset() + + if !publishFlag { + color.Set(color.FgRed) + log.Printf("Note: you run in 'dry' mode to simply check which data would be published.\n") + log.Printf("Use the -publish flag to actually publish the datasets.\n") + color.Unset() // Don't forget to unset + } + + if publishedDataId == "" { /* && *datasetId == "" && *ownerGroup == "" */ + fmt.Println("\n\nTool to publish datasets from the intermediate cache server of the tape archive") + fmt.Printf("to the publication server. Copies the files, creates and installs a download page\n") + fmt.Printf("and updates the downloadLink value for the specified PublishedData document\n\n") + fmt.Printf("Run script without arguments, but specify options:\n\n") + fmt.Printf("datasetPublishData [options] \n\n") + fmt.Printf("Use -publisheddata option to define the datasets which should be published.\n\n") + fmt.Printf("For example:\n") + fmt.Printf("./datasetPublishData -user archiveManager:password -publisheddata 10.16907/05a50450-767f-421d-9832-342b57c201\n\n") + fmt.Printf("To update the PublishedData entry with the downloadLink you have to run the script as user archiveManager\n\n") + flag.PrintDefaults() + return + } + + datasetList, title, doi := datasetUtils.GetDatasetsOfPublication(client, APIServer, publishedDataId) + + // get sourceFolder and other dataset related info for all Datasets + datasetDetails, urls := datasetUtils.GetDatasetDetailsPublished(client, APIServer, datasetList) + + // assemble rsync commands to be submitted + batchCommands := assembleRsyncCommands(datasetDetails) + + if !publishFlag { + color.Set(color.FgRed) + log.Printf("\n\nNote: you run in 'dry' mode to simply check what would happen.") + log.Printf("Use the -publish flag to actually copy data to publication server.") + log.Printf("The following commands will be executed") + log.Printf("%v\n", strings.Join(batchCommands[:], "\n\n")) + color.Unset() + } else { + executeCommands(batchCommands) + createWebpage(urls, title, doi, datasetDetails, publishedDataId, &userpass, &token) + } + }, +} + +func init() { + rootCmd.AddCommand(datasetPublishDataCmd) + + datasetPublishDataCmd.Flags().Bool("publish", false, "Defines if this command is meant to actually publish data (default nothing is done)") + datasetPublishDataCmd.Flags().String("publisheddata", "", "Defines to publish data froma given publishedData document ID") + // datasetPublishDataCmd.Flags().String("dataset", "", "Defines single datasetId to publish") + // datasetPublishDataCmd.Flags().String("ownergroup", "", "Defines to publish only datasets of the specified ownerGroup") + datasetPublishDataCmd.Flags().String("user", "", "Defines optional username:password string") + datasetPublishDataCmd.Flags().String("token", "", "Defines optional API token instead of username:password") + datasetPublishDataCmd.Flags().Bool("testenv", false, "Use test environment (qa) (default is to use production system)") + datasetPublishDataCmd.Flags().Bool("devenv", false, "Use development environment (default is to use production system)") + datasetPublishDataCmd.Flags().Bool("version", false, "Show version number and exit") + + datasetPublishDataCmd.MarkFlagsMutuallyExclusive("testenv", "devenv") +} diff --git a/cmd/commands/datasetPublishDataRetrieve.go b/cmd/commands/datasetPublishDataRetrieve.go new file mode 100644 index 0000000..b71665b --- /dev/null +++ b/cmd/commands/datasetPublishDataRetrieve.go @@ -0,0 +1,132 @@ +package cmd + +import ( + "crypto/tls" + "flag" + "fmt" + "log" + "net/http" + "time" + + "github.com/fatih/color" + "github.com/paulscherrerinstitute/scicat/datasetUtils" + "github.com/spf13/cobra" +) + +var datasetPublishDataRetrieveCmd = &cobra.Command{ + Use: "datasetPublishDataRetrieve [options]", + Short: "Create a job to retrieve all datasets of a given PublishedData item", + Long: `Create a job to retrieve all datasets of a given PublishedData item.`, + Args: cobra.NoArgs, + Run: func(cmd *cobra.Command, args []string) { + // consts & vars + const PROD_API_SERVER string = "https://dacat.psi.ch/api/v3" + const TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" + const DEV_API_SERVER string = "https://dacat-development.psi.ch/api/v3" + + var APIServer string = PROD_API_SERVER + + var client = &http.Client{ + Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: false}}, + Timeout: 10 * time.Second} + + // retrieve params + retrieveFlag, _ := cmd.Flags().GetBool("retrieve") + publishedDataId, _ := cmd.Flags().GetString("publisheddata") + userpass, _ := cmd.Flags().GetString("user") + token, _ := cmd.Flags().GetString("token") + testenvFlag, _ := cmd.Flags().GetBool("testenv") + devenvFlag, _ := cmd.Flags().GetBool("devenv") + showVersion, _ := cmd.Flags().GetBool("version") + + if datasetUtils.TestFlags != nil { + datasetUtils.TestFlags(map[string]interface{}{ + "retrieve": retrieveFlag, + "publisheddata": publishedDataId, + "testenv": testenvFlag, + "devenv": devenvFlag, + "user": userpass, + "token": token, + "version": showVersion, + }) + return + } + + // execute command + if showVersion { + fmt.Printf("%s\n", VERSION) + return + } + + var env string + if testenvFlag { + APIServer = TEST_API_SERVER + env = "test" + } else if devenvFlag { + APIServer = DEV_API_SERVER + env = "dev" + } else { + APIServer = PROD_API_SERVER + env = "production" + } + + color.Set(color.FgGreen) + log.Printf("You are about to trigger a retrieve job for publish dataset(s) from the === %s === retrieve server...", env) + color.Unset() + + if !retrieveFlag { + color.Set(color.FgRed) + log.Printf("Note: you run in 'dry' mode to simply check which data would be retrieved.\n") + log.Printf("Use the -retrieve flag to actually retrieve the datasets.\n") + color.Unset() + } + + if publishedDataId == "" { /* && *datasetId == "" && *ownerGroup == "" */ + fmt.Println("\n\nTool to retrieve datasets to the intermediate cache server of the tape archive") + fmt.Printf("Run script without arguments, but specify options:\n\n") + fmt.Printf("datasetPublishDataRetrieve [options] \n\n") + fmt.Printf("Use -publisheddata option to define the datasets which should be published.\n\n") + fmt.Printf("For example:\n") + fmt.Printf("./datasetPublishDataRetrieve -user archiveManager:password -publisheddata 10.16907/05a50450-767f-421d-9832-342b57c201\n\n") + fmt.Printf("The script should be run as archiveManager\n\n") + flag.PrintDefaults() + return + } + + auth := &datasetUtils.RealAuthenticator{} + user, _ := datasetUtils.Authenticate(auth, client, APIServer, &token, &userpass) + + datasetList, _, _ := datasetUtils.GetDatasetsOfPublication(client, APIServer, publishedDataId) + + // get sourceFolder and other dataset related info for all Datasets and print them + datasetUtils.GetDatasetDetailsPublished(client, APIServer, datasetList) + + if !retrieveFlag { + color.Set(color.FgRed) + log.Printf("\n\nNote: you run in 'dry' mode to simply check what would happen.") + log.Printf("Use the -retrieve flag to actually retrieve data from tape.\n") + color.Unset() + } else { + // create retrieve Job + jobId, err := datasetUtils.CreateRetrieveJob(client, APIServer, user, datasetList) + if err != nil { + log.Fatal(err) + } + fmt.Println(jobId) + } + }, +} + +func init() { + rootCmd.AddCommand(datasetPublishDataRetrieveCmd) + + datasetPublishDataRetrieveCmd.Flags().Bool("retrieve", false, "Defines if this command is meant to actually retrieve data (default: retrieve actions are only displayed)") + datasetPublishDataRetrieveCmd.Flags().String("publisheddata", "", "Defines to publish data from a given publishedData document ID") + datasetPublishDataRetrieveCmd.Flags().String("user", "", "Defines optional username:password string") + datasetPublishDataRetrieveCmd.Flags().String("token", "", "Defines optional API token instead of username:password") + datasetPublishDataRetrieveCmd.Flags().Bool("testenv", false, "Use test environment (qa) (default is to use production system)") + datasetPublishDataRetrieveCmd.Flags().Bool("devenv", false, "Use development environment (default is to use production system)") + datasetPublishDataRetrieveCmd.Flags().Bool("version", false, "Show version number and exit") + + datasetPublishDataRetrieveCmd.MarkFlagsMutuallyExclusive("testenv", "devenv") +} diff --git a/cmd/commands/datasetRetriever.go b/cmd/commands/datasetRetriever.go new file mode 100644 index 0000000..b2e724c --- /dev/null +++ b/cmd/commands/datasetRetriever.go @@ -0,0 +1,229 @@ +package cmd + +import ( + "crypto/tls" + "fmt" + "log" + "net/http" + "os" + "os/exec" + "strings" + "time" + + "github.com/fatih/color" + "github.com/paulscherrerinstitute/scicat/datasetUtils" + "github.com/spf13/cobra" +) + +var datasetRetrieverCmd = &cobra.Command{ + Use: "datasetRetriever (options) local-destination-path", + Short: "Retrieve datasets from intermediate cache, taking into account original sourceFolder names", + Long: `Tool to retrieve datasets from the intermediate cache server of the tape archive to the +destination path on your local system. + +This script must be run on the machine having write access to the destination folder + +The resulting files from dataset folders will be stores in destinationPath/sourceFolders + +In case there are several datasets with the same sourceFolder they will be simply enumerated by appending a "_1", "_2" etc. (not yet implemenmted) + +Per default all available datasets on the retrieve server will be fetched.\n") +Use option -dataset or -ownerGroup to restrict the datasets which should be fetched. + +For further help see "` + MANUAL + `"`, + Args: cobra.ExactArgs(1), + Run: func(cmd *cobra.Command, args []string) { + //consts & vars + const PROD_API_SERVER string = "https://dacat.psi.ch/api/v3" + const TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" + const DEV_API_SERVER string = "https://dacat-development.psi.ch/api/v3" + + const PROD_RSYNC_RETRIEVE_SERVER string = "pb-retrieve.psi.ch" + const TEST_RSYNC_RETRIEVE_SERVER string = "pbt-retrieve.psi.ch" + const DEV_RSYNC_RETRIEVE_SERVER string = "arematest2in.psi.ch" + + // const PROD_RSYNC_RETRIEVE_SERVER string = "ebarema4in.psi.ch" + // const TEST_RSYNC_RETRIEVE_SERVER string = "ebaremat1in.psi.ch" + // const DEV_RSYNC_RETRIEVE_SERVER string = "arematest2in.psi.ch" + + // TODO Windows + const APP = "datasetRetriever" + + var APIServer string = PROD_API_SERVER + var RSYNCServer string = PROD_RSYNC_RETRIEVE_SERVER + + var client = &http.Client{ + Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: false}}, + Timeout: 10 * time.Second} + + // internal functions + assembleRsyncCommands := func(username string, datasetDetails []datasetUtils.Dataset, destinationPath string) ([]string, []string) { + batchCommands := make([]string, 0) + destinationFolders := make([]string, 0) + for _, dataset := range datasetDetails { + shortDatasetId := strings.Split(dataset.Pid, "/")[1] + fullDest := destinationPath + dataset.SourceFolder + command := "mkdir -p " + fullDest + ";" + "/usr/bin/rsync -av -e 'ssh -o StrictHostKeyChecking=no' " + username + "@" + RSYNCServer + ":retrieve/" + shortDatasetId + "/ " + fullDest + batchCommands = append(batchCommands, command) + destinationFolders = append(destinationFolders, fullDest) + } + return batchCommands, destinationFolders + } + + executeCommands := func(batchCommands []string) { + log.Printf("\n\n\n====== Starting transfer of dataset files: \n\n") + for _, batchCommand := range batchCommands { + cmd := exec.Command("/bin/sh", "-c", batchCommand) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + //log.Printf("Running %v.\n", cmd.Args) + log.Printf("\n=== Transfer command: %s.\n", batchCommand) + + err := cmd.Run() + + if err != nil { + log.Fatal(err) + } + } + } + + checkSumVerification := func(destinationFolders []string) { + // sed '/is_directory$/d' __checksum_filename_*__ | awk -v FS=' ' '/^[^#]/{print $2,$1}' | sha1sum -c + log.Printf("\n\n\n====== Starting verification of check sums: \n\n") + for _, destination := range destinationFolders { + command := "cd " + destination + " ; sed '/is_directory$/d' __checksum_filename_*__ | awk -v FS=' ' '/^[^#]/{print $2,$1}' | sha1sum -c" + cmd := exec.Command("/bin/sh", "-c", command) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + // log.Printf("Running %v.\n", cmd.Args) + log.Printf("\n=== Checking files within %s.\n", destination) + err := cmd.Run() + + if err != nil { + log.Fatal(err) + } + } + } + + // retrieve flags + // TODO (from orig. code) extract jobId and checksum flags + retrieveFlag, _ := cmd.Flags().GetBool("retrieve") + userpass, _ := cmd.Flags().GetString("user") + token, _ := cmd.Flags().GetString("token") + nochksumFlag, _ := cmd.Flags().GetBool("nochksum") + datasetId, _ := cmd.Flags().GetString("dataset") + ownerGroup, _ := cmd.Flags().GetString("ownergroup") + testenvFlag, _ := cmd.Flags().GetBool("testenv") + devenvFlag, _ := cmd.Flags().GetBool("devenv") + showVersion, _ := cmd.Flags().GetBool("version") + + if datasetUtils.TestFlags != nil { + datasetUtils.TestFlags(map[string]interface{}{ + "retrieve": retrieveFlag, + "testenv": testenvFlag, + "devenv": devenvFlag, + "user": userpass, + "token": token, + "nochksum": nochksumFlag, + "dataset": datasetId, + "ownergroup": ownerGroup, + "version": showVersion, + }) + return + } + + // execute command + if showVersion { + fmt.Printf("%s\n", VERSION) + return + } + + datasetUtils.CheckForNewVersion(client, APP, VERSION) + + var env string + if testenvFlag { + APIServer = TEST_API_SERVER + RSYNCServer = TEST_RSYNC_RETRIEVE_SERVER + env = "test" + } else if devenvFlag { + APIServer = DEV_API_SERVER + RSYNCServer = DEV_RSYNC_RETRIEVE_SERVER + env = "dev" + } else { + APIServer = PROD_API_SERVER + RSYNCServer = PROD_RSYNC_RETRIEVE_SERVER + env = "production" + } + + color.Set(color.FgGreen) + log.Printf("You are about to retrieve dataset(s) from the === %s === retrieve server...", env) + color.Unset() + + if !retrieveFlag { + color.Set(color.FgRed) + log.Printf("Note: you run in 'dry' mode to simply check which data would be fetched.\n") + log.Printf("Use the -retrieve flag to actually transfer the datasets to your chosen destination path.\n") + color.Unset() // Don't forget to unset + } + + destinationPath := "" + + if len(args) == 1 { + destinationPath = args[0] + } else { + log.Println("invalid number of args") + return + } + + auth := &datasetUtils.RealAuthenticator{} + user, _ := datasetUtils.Authenticate(auth, client, APIServer, &token, &userpass) + + datasetList, err := datasetUtils.GetAvailableDatasets(user["username"], RSYNCServer, datasetId) + if err != nil { + log.Fatal(err) + } + + if len(datasetList) == 0 { + fmt.Printf("\n\nNo datasets found on intermediate cache server.\n") + fmt.Println("Did you submit a retrieve job from the data catalog first ?") + } else { + // get sourceFolder and other dataset related info for all Datasets + datasetDetails, err := datasetUtils.GetDatasetDetails(client, APIServer, user["accessToken"], datasetList, ownerGroup) + if err != nil { + log.Fatal(err) + } + + // assemble rsync commands to be submitted + batchCommands, destinationFolders := assembleRsyncCommands(user["username"], datasetDetails, destinationPath) + // log.Printf("%v\n", batchCommands) + + if !retrieveFlag { + color.Set(color.FgRed) + log.Printf("\n\nNote: you run in 'dry' mode to simply check what would happen.") + log.Printf("Use the -retrieve flag to actually retrieve datasets.") + color.Unset() + } else { + executeCommands(batchCommands) + if !nochksumFlag { + checkSumVerification(destinationFolders) + } + } + } + }, +} + +func init() { + rootCmd.AddCommand(datasetRetrieverCmd) + + datasetRetrieverCmd.Flags().Bool("retrieve", false, "Defines if this command is meant to actually copy data to the local system (default nothing is done)") + datasetRetrieverCmd.Flags().String("user", "", "Defines optional username and password (default is to prompt for username and password)") + datasetRetrieverCmd.Flags().String("token", "", "Defines optional API token instead of username:password") + datasetRetrieverCmd.Flags().Bool("nochksum", false, "Switch off chksum verification step (default checksum tests are done)") + datasetRetrieverCmd.Flags().String("dataset", "", "Defines single dataset to retrieve (default all available datasets)") + datasetRetrieverCmd.Flags().String("ownergroup", "", "Defines to fetch only datasets of the specified ownerGroup (default is to fetch all available datasets)") + datasetRetrieverCmd.Flags().Bool("testenv", false, "Use test environment (qa) (default is to use production system)") + datasetRetrieverCmd.Flags().Bool("devenv", false, "Use development environment (default is to use production system)") + datasetRetrieverCmd.Flags().Bool("version", false, "Show version number and exit") + + datasetRetrieverCmd.MarkFlagsMutuallyExclusive("testenv", "devenv") +} diff --git a/cmd/commands/publicVars.go b/cmd/commands/publicVars.go new file mode 100644 index 0000000..5323167 --- /dev/null +++ b/cmd/commands/publicVars.go @@ -0,0 +1,3 @@ +package cmd + +var VERSION string diff --git a/cmd/commands/root.go b/cmd/commands/root.go new file mode 100644 index 0000000..aa9beb3 --- /dev/null +++ b/cmd/commands/root.go @@ -0,0 +1,28 @@ +package cmd + +import ( + "os" + + "github.com/spf13/cobra" +) + +var rootCmd = &cobra.Command{ + Use: "cmd", + Short: "CLI app for interacting with a SciCat instance", + Long: `This library comprises a few subcommands for managing SciCat +and datasets on it, as well as interacting with the archival system connected +to it.`, + // uncomment the next line if there's a default action + // Run: func(cmd *cobra.Command, args []string) { }, +} + +func Execute() { + err := rootCmd.Execute() + if err != nil { + os.Exit(1) + } +} + +func init() { + rootCmd.Flags().BoolP("toggle", "t", false, "Help message for toggle") +} diff --git a/cmd/commands/waitForJobFinished.go b/cmd/commands/waitForJobFinished.go new file mode 100644 index 0000000..6133460 --- /dev/null +++ b/cmd/commands/waitForJobFinished.go @@ -0,0 +1,181 @@ +package cmd + +import ( + "crypto/tls" + "encoding/json" + "flag" + "fmt" + "io" + "log" + "net/http" + "net/url" + "time" + + "github.com/fatih/color" + "github.com/paulscherrerinstitute/scicat/datasetUtils" + "github.com/spf13/cobra" +) + +var waitForJobFinishedCmd = &cobra.Command{ + Use: "waitForJobFinished (options)", + Short: "Waits for job to be finished", + Long: `This script polls the status of a given job and returns when Job is finished`, + Args: cobra.NoArgs, + Run: func(cmd *cobra.Command, args []string) { + // consts & vars + var client = &http.Client{ + Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: false}}, + Timeout: 10 * time.Second} + + const PROD_API_SERVER string = "https://dacat.psi.ch/api/v3" + const TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" + const DEV_API_SERVER string = "https://dacat-development.psi.ch/api/v3" + + var APIServer string + var env string + + // structs + type Job struct { + Id string + JobStatusMessage string + } + + // funcs + handlePollResponse := func(resp *http.Response) (stopPolling bool, err error) { + if resp.StatusCode != 200 { + return true, fmt.Errorf("querying dataset details failed with status code %v", resp.StatusCode) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return true, err + } + + var jobDetails []Job + err = json.Unmarshal(body, &jobDetails) + if err != nil { + return true, err + } + if len(jobDetails) == 0 { + return false, nil + } + if jobDetails[0].JobStatusMessage == "finished" { + return true, nil + } else { + return false, nil + } + } + + // retrieve flags + userpass, _ := cmd.Flags().GetString("user") + token, _ := cmd.Flags().GetString("token") + jobId, _ := cmd.Flags().GetString("job") + testenvFlag, _ := cmd.Flags().GetBool("testenv") + devenvFlag, _ := cmd.Flags().GetBool("devenv") + showVersion, _ := cmd.Flags().GetBool("version") + + if datasetUtils.TestFlags != nil { + datasetUtils.TestFlags(map[string]interface{}{ + "user": userpass, + "token": token, + "job": jobId, + "testenv": testenvFlag, + "devenv": devenvFlag, + "version": showVersion, + }) + return + } + + // command + if showVersion { + fmt.Printf("%s\n", VERSION) + return + } + + if testenvFlag { + APIServer = TEST_API_SERVER + env = "test" + } else if devenvFlag { + APIServer = DEV_API_SERVER + env = "dev" + } else { + APIServer = PROD_API_SERVER + env = "production" + } + + color.Set(color.FgGreen) + log.Printf("You are about to wait for a job to be finished from the === %s === API server...", env) + color.Unset() + + if jobId == "" { /* && *datasetId == "" && *ownerGroup == "" */ + fmt.Println("\n\nTool to wait for job to be finished") + fmt.Printf("Run script without arguments, but specify options:\n\n") + fmt.Printf("waitForJobFinished [options] \n\n") + fmt.Printf("Use -job option to define the job that should be polled.\n\n") + fmt.Printf("For example:\n") + fmt.Printf("./waitForJobFinished -job ... \n\n") + flag.PrintDefaults() + return + } + + auth := &datasetUtils.RealAuthenticator{} + user, _ := datasetUtils.Authenticate(auth, client, APIServer, &token, &userpass) + + filter := `{"where":{"id":"` + jobId + `"}}` + + v := url.Values{} + v.Set("filter", filter) + v.Add("access_token", user["accessToken"]) + + var myurl = APIServer + "/Jobs?" + v.Encode() + + timeoutchan := make(chan bool) + ticker := time.NewTicker(5 * time.Second) + quit := make(chan struct{}) + go func() { + for { + select { + case <-ticker.C: + resp, err := client.Get(myurl) + if err != nil { + log.Fatal("Get Job failed:", err) + } + stopPolling, err := handlePollResponse(resp) + if stopPolling { + if err != nil { + fmt.Println(err) + } else { + fmt.Println("finished") + } + ticker.Stop() + timeoutchan <- true + } + case <-quit: + ticker.Stop() + timeoutchan <- true + } + } + }() + + select { + case <-timeoutchan: + break + case <-time.After(time.Hour * 24): + break + } + }, +} + +func init() { + rootCmd.AddCommand(waitForJobFinishedCmd) + + waitForJobFinishedCmd.Flags().String("user", "", "Defines optional username and password") + waitForJobFinishedCmd.Flags().String("token", "", "Defines optional API token instead of username:password") + waitForJobFinishedCmd.Flags().String("job", "", "Defines the job id to poll") + waitForJobFinishedCmd.Flags().Bool("testenv", false, "Use test environment (qa) instead or production") + waitForJobFinishedCmd.Flags().Bool("devenv", false, "Use development environment instead or production") + waitForJobFinishedCmd.Flags().Bool("version", false, "Show version number and exit") + + waitForJobFinishedCmd.MarkFlagsMutuallyExclusive("testenv", "devenv") +} diff --git a/cmd/datasetArchiver/main.go b/cmd/datasetArchiver/main.go deleted file mode 100644 index 0c945b9..0000000 --- a/cmd/datasetArchiver/main.go +++ /dev/null @@ -1,143 +0,0 @@ -/* - -This script archives all datasets in state datasetCreated from a given ownerGroup - -*/ - -package main - -import ( - "bufio" - "crypto/tls" - "flag" - "fmt" - "log" - "net/http" - "os" - "strings" - "time" - - "github.com/paulscherrerinstitute/scicat/datasetUtils" - - "github.com/fatih/color" -) - -var VERSION string - -func main() { - var client = &http.Client{ - Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: false}}, - Timeout: 10 * time.Second} - - const PROD_API_SERVER string = "https://dacat.psi.ch/api/v3" - const TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" - const DEV_API_SERVER string = "https://dacat-development.psi.ch/api/v3" - const LOCAL_API_SERVER string = "http://localhost:3000/api/v3" - - const MANUAL string = "http://melanie.gitpages.psi.ch/SciCatPages" - const APP = "datasetArchiver" - var scanner = bufio.NewScanner(os.Stdin) - - var APIServer string - var env string - - // pass parameters - userpass := flag.String("user", "", "Defines optional username and password") - token := flag.String("token", "", "Defines optional API token instead of username:password") - tapecopies := flag.Int("tapecopies", 1, "Number of tapecopies to be used for archiving") - testenvFlag := flag.Bool("testenv", false, "Use test environment (qa) instead or production") - localenvFlag := flag.Bool("localenv", false, "Use local environment (local) instead or production") - devenvFlag := flag.Bool("devenv", false, "Use development environment instead or production") - nonInteractiveFlag := flag.Bool("noninteractive", false, "Defines if no questions will be asked, just do it - make sure you know what you are doing") - showVersion := flag.Bool("version", false, "Show version number and exit") - - flag.Parse() - - if datasetUtils.TestFlags != nil { - datasetUtils.TestFlags(map[string]interface{}{ - "user": *userpass, - "token": *token, - "tapecopies": *tapecopies, - "testenv": *testenvFlag, - "localenv": *localenvFlag, - "devenv": *devenvFlag, - "noninteractive": *nonInteractiveFlag, - "version": *showVersion, - }) - return - } - - if *showVersion { - fmt.Printf("%s\n", VERSION) - return - } - - // check for program version only if running interactively - datasetUtils.CheckForNewVersion(client, APP, VERSION) - - if *testenvFlag { - APIServer = TEST_API_SERVER - env = "test" - } else if *devenvFlag { - APIServer = DEV_API_SERVER - env = "dev" - } else if *localenvFlag { - APIServer = LOCAL_API_SERVER - env = "local" - } else { - APIServer = PROD_API_SERVER - env = "production" - } - - color.Set(color.FgGreen) - log.Printf("You are about to archive dataset(s) to the === %s === data catalog environment...", env) - color.Unset() - - args := flag.Args() - ownerGroup := "" - inputdatasetList := make([]string, 0) - - // argsWithoutProg := os.Args[1:] - if len(args) == 0 { - fmt.Printf("\n\nTool to archive datasets to the data catalog.\n\n") - fmt.Printf("Run script with the following options and parameter:\n\n") - fmt.Printf("datasetArchiver [options] (ownerGroup | space separated list of datasetIds) \n\n") - fmt.Printf("You must choose either an ownerGroup, in which case all archivable datasets\n") - fmt.Printf("of this ownerGroup not yet archived will be archived.\n") - fmt.Printf("Or you choose a (list of) datasetIds, in which case all archivable datasets\n") - fmt.Printf("of this list not yet archived will be archived.\n\n") - fmt.Printf("List of options:\n\n") - flag.PrintDefaults() - fmt.Printf("\n\nFor further help see " + MANUAL + "\n\n") - return - } else if len(args) == 1 && !strings.Contains(args[0], "/") { - ownerGroup = args[0] - } else { - inputdatasetList = args[0:] - } - - auth := &datasetUtils.RealAuthenticator{} - user, _ := datasetUtils.Authenticate(auth, client, APIServer, token, userpass) - - archivableDatasets := datasetUtils.GetArchivableDatasets(client, APIServer, ownerGroup, inputdatasetList, user["accessToken"]) - if len(archivableDatasets) > 0 { - archive := "" - if *nonInteractiveFlag { - archive = "y" - } else { - fmt.Printf("\nDo you want to archive these %v datasets (y/N) ? ", len(archivableDatasets)) - scanner.Scan() - archive = scanner.Text() - } - if archive != "y" { - log.Fatalf("Okay the archive process is stopped here, no datasets will be archived\n") - } else { - log.Printf("You chose to archive the new datasets\n") - log.Printf("Submitting Archive Job for the ingested datasets.\n") - jobId := datasetUtils.CreateJob(client, APIServer, user, archivableDatasets, tapecopies) - fmt.Println(jobId) - } - } else { - log.Fatalf("No archivable datasets remaining") - } -} diff --git a/cmd/datasetArchiver/main_test.go b/cmd/datasetArchiver/main_test.go deleted file mode 100644 index b9de7c4..0000000 --- a/cmd/datasetArchiver/main_test.go +++ /dev/null @@ -1,82 +0,0 @@ -package main - -import ( - "flag" - "os" - "testing" - - "github.com/paulscherrerinstitute/scicat/datasetUtils" -) - -func TestMainFlags(t *testing.T) { - // test cases - tests := []struct { - name string - flags map[string]interface{} - args []string - }{ - { - name: "Test without flags", - flags: map[string]interface{}{ - "testenv": false, - "devenv": false, - "localenv": false, - "noninteractive": false, - "version": false, - "user": "", - "token": "", - "tapecopies": 1, - }, - args: []string{"test"}, - }, - { - name: "Set all flags", - flags: map[string]interface{}{ - "testenv": true, - "devenv": true, - "localenv": true, - "noninteractive": true, - "version": true, - "user": "usertest:passtest", - "token": "token", - "tapecopies": 6571579, - }, - args: []string{ - "test", - "--testenv", - "--devenv", - "--localenv", - "--noninteractive", - "--user", - "usertest:passtest", - "--token", - "token", - "--tapecopies", - "6571579", - "--version", - }, - }, - } - - // running test cases - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - flag.CommandLine = flag.NewFlagSet(test.name, flag.ExitOnError) - datasetUtils.TestFlags = func(flags map[string]interface{}) { - passing := true - for flag := range test.flags { - if flags[flag] != test.flags[flag] { - t.Logf("%s's value should be \"%v\" but it's \"%v\", or non-matching type", flag, test.flags[flag], flags[flag]) - passing = false - } - } - if !passing { - t.Fail() - } - } - - os.Args = test.args - main() - }) - } -} diff --git a/cmd/datasetCleaner/main.go b/cmd/datasetCleaner/main.go deleted file mode 100644 index 61c4a1e..0000000 --- a/cmd/datasetCleaner/main.go +++ /dev/null @@ -1,146 +0,0 @@ -/* - -Purpose: Remove dataset from archive and optionally from data catalog - -This script must be run by the archiveManager role. - -If Datablock entries exist for a given dataset a reset job will be launched -If the Dataset should be removed from the data catalog as well the corresponding -documents in Dataset and OrigDatablock will be deleted as well. This will only -happen once the reset job is finished. The tool will try to remove the dataset -catalog entries each minute until Dataset is found to be in archivable statet again -and only then will be deleted in the data catalog - -Note: these actions can not be un-done ! Be careful - -Call like this: - -datasetCleaner --removeFromCatalog datasetPid - - - Return code (useful for wrapper scripts): - rc=0: command excuted correctly - rc=1: command exited with errors and needs to be repeated - - -*/ - -package main - -import ( - "crypto/tls" - "flag" - "fmt" - "log" - "net/http" - "time" - - "github.com/paulscherrerinstitute/scicat/datasetUtils" - - "github.com/fatih/color" -) - -func isFlagPassed(name string) bool { - found := false - flag.Visit(func(f *flag.Flag) { - if f.Name == name { - found = true - } - }) - return found -} - -var VERSION string - -func main() { - var client = &http.Client{ - Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: false}}, - Timeout: 10 * time.Second} - - const PROD_API_SERVER string = "https://dacat.psi.ch/api/v3" - const TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" - const DEV_API_SERVER string = "https://dacat-development.psi.ch/api/v3" - - const MANUAL string = "http://melanie.gitpages.psi.ch/SciCatPages" - const APP = "datasetCleaner" - - var APIServer string - var env string - - // pass parameters - removeFromCatalogFlag := flag.Bool("removeFromCatalog", false, "Defines if the dataset should also be deleted from data catalog") - nonInteractiveFlag := flag.Bool("nonInteractive", false, "Defines if no questions will be asked, just do it - make sure you know what you are doing") - testenvFlag := flag.Bool("testenv", false, "Use test environment (qa) instead of production environment") - devenvFlag := flag.Bool("devenv", false, "Use development environment instead of production environment (developers only)") - userpass := flag.String("user", "", "Defines optional username:password string") - token := flag.String("token", "", "Defines optional API token instead of username:password") - showVersion := flag.Bool("version", false, "Show version number and exit") - - flag.Parse() - - if datasetUtils.TestFlags != nil { - datasetUtils.TestFlags(map[string]interface{}{ - "user": *userpass, - "token": *token, - "testenv": *testenvFlag, - "devenv": *devenvFlag, - "nonInteractive": *nonInteractiveFlag, - "removeFromCatalog": *removeFromCatalogFlag, - "version": *showVersion}) - return - } - - if *showVersion { - fmt.Printf("%s\n", VERSION) - return - } - - // check for program version only if running interactively - datasetUtils.CheckForNewVersion(client, APP, VERSION) - datasetUtils.CheckForServiceAvailability(client, *testenvFlag, true) - - //} - - if *testenvFlag { - APIServer = TEST_API_SERVER - env = "test" - } else if *devenvFlag { - APIServer = DEV_API_SERVER - env = "dev" - } else { - APIServer = PROD_API_SERVER - env = "production" - } - - color.Set(color.FgRed) - log.Printf("You are about to remove a dataset from the === %s === data catalog environment...", env) - color.Unset() - - args := flag.Args() - pid := "" - - if len(args) == 1 { - pid = args[0] - } else { - fmt.Printf("\n\nTool to remove datasets from the data catalog.\n\n") - fmt.Printf("Run script with one dataset pid as argument:\n\n") - fmt.Printf("datasetIngestor [options] dataset-PID\n\n") - flag.PrintDefaults() - fmt.Printf("\n\nFor further help see " + MANUAL + "\n\n") - return - } - - auth := &datasetUtils.RealAuthenticator{} - user, _ := datasetUtils.Authenticate(auth, client, APIServer, token, userpass) - - if user["username"] != "archiveManager" { - log.Fatalf("You must be archiveManager to be allowed to delete datasets\n") - } - - datasetUtils.RemoveFromArchive(client, APIServer, pid, user, *nonInteractiveFlag) - - if *removeFromCatalogFlag { - datasetUtils.RemoveFromCatalog(client, APIServer, pid, user, *nonInteractiveFlag) - } else { - log.Println("To also delete the dataset from the catalog add the flag -removeFromCatalog") - } -} diff --git a/cmd/datasetCleaner/main_test.go b/cmd/datasetCleaner/main_test.go deleted file mode 100644 index 26be1b3..0000000 --- a/cmd/datasetCleaner/main_test.go +++ /dev/null @@ -1,78 +0,0 @@ -package main - -import ( - "flag" - "os" - "testing" - - "github.com/paulscherrerinstitute/scicat/datasetUtils" -) - -func TestMainFlags(t *testing.T) { - // test cases - tests := []struct { - name string - flags map[string]interface{} - args []string - }{ - { - name: "Test without flags", - flags: map[string]interface{}{ - "testenv": false, - "devenv": false, - "nonInteractive": false, - "removeFromCatalog": false, - "version": false, - "user": "", - "token": "", - }, - args: []string{"test"}, - }, - { - name: "Set all flags", - flags: map[string]interface{}{ - "testenv": true, - "devenv": true, - "nonInteractive": true, - "removeFromCatalog": true, - "version": true, - "user": "usertest:passtest", - "token": "token", - }, - args: []string{ - "test", - "--testenv", - "--devenv", - "--nonInteractive", - "--removeFromCatalog", - "--user", - "usertest:passtest", - "--token", - "token", - "--version", - }, - }, - } - - // running test cases - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - flag.CommandLine = flag.NewFlagSet(test.name, flag.ExitOnError) - datasetUtils.TestFlags = func(flags map[string]interface{}) { - passing := true - for flag := range test.flags { - if flags[flag] != test.flags[flag] { - t.Logf("%s's value should be \"%v\" but it's \"%v\", or non-matching type", flag, test.flags[flag], flags[flag]) - passing = false - } - } - if !passing { - t.Fail() - } - } - - os.Args = test.args - main() - }) - } -} diff --git a/cmd/datasetGetProposal/main.go b/cmd/datasetGetProposal/main.go deleted file mode 100644 index 1d0a769..0000000 --- a/cmd/datasetGetProposal/main.go +++ /dev/null @@ -1,122 +0,0 @@ -/* - -This script returns the proposal information for a given ownerGroup - -*/ - -package main - -import ( - "crypto/tls" - "encoding/json" - "flag" - "fmt" - "log" - "net/http" - "os" - "time" - - "github.com/paulscherrerinstitute/scicat/datasetUtils" - - "github.com/fatih/color" -) - -var VERSION string - -func main() { - var client = &http.Client{ - Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: false}}, - Timeout: 10 * time.Second} - - const PROD_API_SERVER string = "https://dacat.psi.ch/api/v3" - const TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" - const DEV_API_SERVER string = "https://dacat-development.psi.ch/api/v3" - - const MANUAL string = "http://melanie.gitpages.psi.ch/SciCatPages" - const APP = "datasetGetProposal" - - var APIServer string - var env string - - // pass parameters - userpass := flag.String("user", "", "Defines optional username and password") - token := flag.String("token", "", "Defines optional API token instead of username:password") - fieldname := flag.String("field", "", "Defines optional field name , whose value should be returned instead of full information") - testenvFlag := flag.Bool("testenv", false, "Use test environment (qa) instead or production") - devenvFlag := flag.Bool("devenv", false, "Use development environment instead or production") - showVersion := flag.Bool("version", false, "Show version number and exit") - - flag.Parse() - - // flag testing only - if datasetUtils.TestFlags != nil { - datasetUtils.TestFlags(map[string]interface{}{ - "user": *userpass, - "token": *token, - "field": *fieldname, - "testenv": *testenvFlag, - "devenv": *devenvFlag, - "version": *showVersion}) - return - } - - if *showVersion { - fmt.Printf("%s\n", VERSION) - return - } - - // check for program version only if running interactively - datasetUtils.CheckForNewVersion(client, APP, VERSION) - - if *testenvFlag { - APIServer = TEST_API_SERVER - env = "test" - } else if *devenvFlag { - APIServer = DEV_API_SERVER - env = "dev" - } else { - APIServer = PROD_API_SERVER - env = "production" - } - - color.Set(color.FgGreen) - log.Printf("You are about to retrieve the proposal information from the === %s === data catalog environment...", env) - color.Unset() - - args := flag.Args() - ownerGroup := "" - - //TODO cleanup text formatting: - if len(args) == 1 { - ownerGroup = args[0] - } else { - fmt.Printf("\n\nTool to retrieve proposal information for a given ownerGroup.\n\n") - fmt.Printf("Run script with the following options and parameter:\n\n") - fmt.Printf("datasetGetProposal [options] ownerGroup\n\n") - flag.PrintDefaults() - fmt.Printf("\n\nFor further help see " + MANUAL + "\n\n") - return - } - - auth := &datasetUtils.RealAuthenticator{} - user, accessGroups := datasetUtils.Authenticate(auth, client, APIServer, token, userpass) - proposal, err := datasetUtils.GetProposal(client, APIServer, ownerGroup, user, accessGroups) - if err != nil { - log.Fatalf("Error: %v\n", err) - } - - // proposal is of type map[string]interface{} - - if len(proposal) > 0 { - if *fieldname != "" { - fmt.Println(proposal[*fieldname]) - } else { - pretty, _ := json.MarshalIndent(proposal, "", " ") - fmt.Printf("%s\n", pretty) - } - os.Exit(0) - } else { - log.Printf("No Proposal information found for group %v\n", ownerGroup) - os.Exit(1) - } -} diff --git a/cmd/datasetGetProposal/main_test.go b/cmd/datasetGetProposal/main_test.go deleted file mode 100644 index 1d7d282..0000000 --- a/cmd/datasetGetProposal/main_test.go +++ /dev/null @@ -1,76 +0,0 @@ -package main - -import ( - "flag" - "os" - "testing" - - "github.com/paulscherrerinstitute/scicat/datasetUtils" -) - -func TestMainFlags(t *testing.T) { - // test cases - tests := []struct { - name string - flags map[string]interface{} - args []string - }{ - { - name: "Test without flags", - flags: map[string]interface{}{ - "testenv": false, - "devenv": false, - "version": false, - "user": "", - "token": "", - "field": "", - }, - args: []string{"test"}, - }, - { - name: "Set all flags", - flags: map[string]interface{}{ - "testenv": true, - "devenv": true, - "version": true, - "user": "usertest:passtest", - "token": "token", - "field": "some field", - }, - args: []string{ - "test", - "--testenv", - "--devenv", - "--user", - "usertest:passtest", - "--token", - "token", - "--field", - "some field", - "--version", - }, - }, - } - - // running test cases - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - flag.CommandLine = flag.NewFlagSet(test.name, flag.ExitOnError) - datasetUtils.TestFlags = func(flags map[string]interface{}) { - passing := true - for flag := range test.flags { - if flags[flag] != test.flags[flag] { - t.Logf("%s's value should be \"%v\" but it's \"%v\", or non-matching type", flag, test.flags[flag], flags[flag]) - passing = false - } - } - if !passing { - t.Fail() - } - } - - os.Args = test.args - main() - }) - } -} diff --git a/cmd/datasetIngestor/main.go b/cmd/datasetIngestor/main.go deleted file mode 100644 index 07ea95b..0000000 --- a/cmd/datasetIngestor/main.go +++ /dev/null @@ -1,430 +0,0 @@ -/* - -Purpose: define and add a dataset to the SciCat datacatalog - -This script must be run on the machine having access to the data which comprises the dataset -It takes one or two input files and creates the necessary messages which trigger the creation -of the corresponding datacatalog entries - - Input files - - - metadata.json: the metadata going with the data. The structure of the meta data - depends on the type of the dataset (raw, derived, base) - It must have a type and a sourceFolder field defined - - either name of filelisting file: contains list of files and folders which belong to the dataset. - In the simplest case this is just the path to a single file or folder - (TODO optionally: add exclusion regexp for file not to be included) - All paths are relative to the sourceFolder defined inside the metadata.json files - - or "folderlisting.txt": (implies empty filelisting, i.e all files in folders): contains list of - sourceFolders as absolute path names, for each a dataset is created with the metadata defined above - and only the sourceFolder field being substituted - - Output: - - Optionally a copy of the data on a central rsync server if data is not stored on central system - - Entries in the data catalog created via the dacat API - - a new (Raw/Derived)Dataset entry - - the origDataBlocks entries - - optionally a new job if autoarchive is requested - - - Return code (useful for wrapper scripts): - rc=0: command excuted correctly - rc=1: command exited with errors and needs to be repeated - - Note: the log.Fatal function calls os.exit(1) already - -*/ - -package main - -import ( - "bufio" - "crypto/tls" - "encoding/json" - "flag" - "fmt" - "log" - "net/http" - "os" - "path/filepath" - "strings" - "time" - - "github.com/paulscherrerinstitute/scicat/datasetIngestor" - "github.com/paulscherrerinstitute/scicat/datasetUtils" - - "github.com/fatih/color" -) - -const TOTAL_MAXFILES = 400000 - -func isFlagPassed(name string) bool { - found := false - flag.Visit(func(f *flag.Flag) { - if f.Name == name { - found = true - } - }) - return found -} - -var VERSION string - -func main() { - var tooLargeDatasets = 0 - var emptyDatasets = 0 - - var originalMap = make(map[string]string) - - var client = &http.Client{ - Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: false}}, - Timeout: 120 * time.Second} - - const PROD_API_SERVER string = "https://dacat.psi.ch/api/v3" - const TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" - const DEV_API_SERVER string = "https://dacat-development.psi.ch/api/v3" - const LOCAL_API_SERVER string = "http://localhost:3000/api/v3" - const TUNNEL_API_SERVER string = "https://dacat-development.psi.ch:5443/api/v3" - - const PROD_RSYNC_ARCHIVE_SERVER string = "pb-archive.psi.ch" - const TEST_RSYNC_ARCHIVE_SERVER string = "pbt-archive.psi.ch" - const DEV_RSYNC_ARCHIVE_SERVER string = "arematest2in.psi.ch" - const LOCAL_RSYNC_ARCHIVE_SERVER string = "localhost" - const TUNNEL_RSYNC_ARCHIVE_SERVER string = "arematest2in.psi.ch:2022" - - // const PROD_RSYNC_ARCHIVE_SERVER string = "ebarema2in.psi.ch" - // const TEST_RSYNC_ARCHIVE_SERVER string = "ebaremat1in.psi.ch" - // const DEV_RSYNC_ARCHIVE_SERVER string = "arematest2in.psi.ch" - - const MANUAL string = "http://melanie.gitpages.psi.ch/SciCatPages" - const APP = "datasetIngestor" - - var scanner = bufio.NewScanner(os.Stdin) - var APIServer string - var RSYNCServer string - var env string - - // pass parameters - ingestFlag := flag.Bool("ingest", false, "Defines if this command is meant to actually ingest data") - testenvFlag := flag.Bool("testenv", false, "Use test environment (qa) instead of production environment") - devenvFlag := flag.Bool("devenv", false, "Use development environment instead of production environment (developers only)") - localenvFlag := flag.Bool("localenv", false, "Use local environment instead of production environment (developers only)") - tunnelenvFlag := flag.Bool("tunnelenv", false, "Use tunneled API server at port 5443 to access development instance (developers only)") - noninteractiveFlag := flag.Bool("noninteractive", false, "If true, no questions will be asked and the default settings for all undefined flags will be assumed") - userpass := flag.String("user", "", "Defines optional username:password string. This can be used both for access to the data catalog API and for access to the intermediate storage server for the decentral use case") - token := flag.String("token", "", "Defines API token for access to the data catalog API. It is now mandatory for normal user accounts, but optional for functional accounts. It takes precedence over username/pw.") - copyFlag := flag.Bool("copy", false, "Defines if files should be copied from your local system to a central server before ingest (i.e. your data is not centrally available and therefore needs to be copied ='decentral' case). copyFlag has higher priority than nocopyFlag. If neither flag is defined the tool will try to make the best guess.") - nocopyFlag := flag.Bool("nocopy", false, "Defines if files should *not* be copied from your local system to a central server before ingest (i.e. your data is centrally available and therefore does not need to be copied ='central' case).") - tapecopies := flag.Int("tapecopies", 0, "Number of tapecopies to be used for archiving") - autoarchiveFlag := flag.Bool("autoarchive", false, "Option to create archive job automatically after ingestion") - linkfiles := flag.String("linkfiles", "keepInternalOnly", "Define what to do with symbolic links: (keep|delete|keepInternalOnly)") - allowExistingSourceFolder := flag.Bool("allowexistingsource", false, "Defines if existing sourceFolders can be reused") - addAttachment := flag.String("addattachment", "", "Filename of image to attach (single dataset case only)") - addCaption := flag.String("addcaption", "", "Optional caption to be stored with attachment (single dataset case only)") - showVersion := flag.Bool("version", false, "Show version number and exit") - - flag.Parse() - - if datasetUtils.TestFlags != nil { - datasetUtils.TestFlags( - map[string]interface{}{ - "ingest": *ingestFlag, - "testenv": *testenvFlag, - "devenv": *devenvFlag, - "localenv": *localenvFlag, - "tunnelenv": *tunnelenvFlag, - "noninteractive": *noninteractiveFlag, - "user": *userpass, - "token": *token, - "copy": *copyFlag, - "nocopy": *nocopyFlag, - "tapecopies": *tapecopies, - "autoarchive": *autoarchiveFlag, - "linkfiles": *linkfiles, - "allowexistingsource": *allowExistingSourceFolder, - "addattachment": *addAttachment, - "addcaption": *addCaption, - "version": *showVersion, - }) - return - } - - // to distinguish between defined and undefined flags needed if interactive questions askes - if !*noninteractiveFlag { - if !isFlagPassed("linkfiles") { - linkfiles = nil - } - if !isFlagPassed("allowexistingsource") { - allowExistingSourceFolder = nil - } - } - - if *showVersion { - fmt.Printf("%s\n", VERSION) - return - } - - // check for program version only if running interactively - datasetUtils.CheckForNewVersion(client, APP, VERSION) - datasetUtils.CheckForServiceAvailability(client, *testenvFlag, *autoarchiveFlag) - - //} - - if *testenvFlag { - APIServer = TEST_API_SERVER - RSYNCServer = TEST_RSYNC_ARCHIVE_SERVER - env = "test" - } else if *devenvFlag { - APIServer = DEV_API_SERVER - RSYNCServer = DEV_RSYNC_ARCHIVE_SERVER - env = "dev" - } else if *localenvFlag { - APIServer = LOCAL_API_SERVER - RSYNCServer = LOCAL_RSYNC_ARCHIVE_SERVER - env = "local" - } else if *tunnelenvFlag { - APIServer = TUNNEL_API_SERVER - RSYNCServer = TUNNEL_RSYNC_ARCHIVE_SERVER - env = "dev" - } else { - APIServer = PROD_API_SERVER - RSYNCServer = PROD_RSYNC_ARCHIVE_SERVER - env = "production" - } - - color.Set(color.FgGreen) - log.Printf("You are about to add a dataset to the === %s === data catalog environment...", env) - color.Unset() - - args := flag.Args() - metadatafile := "" - filelistingPath := "" - folderlistingPath := "" - absFileListing := "" - - if len(args) == 1 { - metadatafile = args[0] - } else if len(args) == 2 { - metadatafile = args[0] - if args[1] == "folderlisting.txt" { - folderlistingPath = args[1] - } else { - filelistingPath = args[1] - absFileListing, _ = filepath.Abs(filelistingPath) - } - } else { - fmt.Printf("\n\nTool to ingest datasets to the data catalog.\n\n") - fmt.Printf("Run script with either 1 or 2 arguments:\n\n") - fmt.Printf("datasetIngestor [options] metadata-file [filelisting-file|'folderlisting.txt']\n\n") - flag.PrintDefaults() - fmt.Printf("\n\nFor further help see " + MANUAL + "\n") - fmt.Printf("\nSpecial hints for the decentral use case, where data is copied first to intermediate storage:\n") - fmt.Printf("For Linux you need to have a valid Kerberos tickets, which you can get via the kinit command.\n") - fmt.Printf("For Windows you need instead to specify -user username:password on the command line.\n") - return - } - - auth := &datasetUtils.RealAuthenticator{} - user, accessGroups := datasetUtils.Authenticate(auth, client, APIServer, token, userpass) - - /* TODO Add info about policy settings and that autoarchive will take place or not */ - - metaDataMap, sourceFolder, beamlineAccount, err := datasetIngestor.CheckMetadata(client, APIServer, metadatafile, user, accessGroups) - if err != nil { - log.Fatal("Error in CheckMetadata function: ", err) - } - //log.Printf("metadata object: %v\n", metaDataMap) - - // assemble list of folders (=datasets) to created - var folders []string - if folderlistingPath == "" { - folders = append(folders, sourceFolder) - } else { - // get folders from file - folderlist, err := os.ReadFile(folderlistingPath) - if err != nil { - log.Fatal(err) - } - lines := strings.Split(string(folderlist), "\n") - // remove all empty and comment lines - for _, sourceFolder := range lines { - if sourceFolder != "" && string(sourceFolder[0]) != "#" { - // convert into canonical form only for certain online data linked from eaccounts home directories - var parts = strings.Split(sourceFolder, "/") - if len(parts) > 3 && parts[3] == "data" { - realSourceFolder, err := filepath.EvalSymlinks(sourceFolder) - if err != nil { - log.Fatalf("Failed to find canonical form of sourceFolder:%v %v", sourceFolder, err) - } - color.Set(color.FgYellow) - log.Printf("Transform sourceFolder %v to canonical form: %v", sourceFolder, realSourceFolder) - color.Unset() - folders = append(folders, realSourceFolder) - } else { - folders = append(folders, sourceFolder) - } - } - } - } - // log.Printf("Selected folders: %v\n", folders) - - // test if a sourceFolder already used in the past and give warning - datasetIngestor.TestForExistingSourceFolder(folders, client, APIServer, user["accessToken"], allowExistingSourceFolder) - - // TODO ask archive system if sourcefolder is known to them. If yes no copy needed, otherwise - // a destination location is defined by the archive system - // for now let the user decide if he needs a copy - - // now everything is prepared, start to loop over all folders - var skip = "" - var datasetList []string - for _, sourceFolder := range folders { - // ignore empty lines - if sourceFolder == "" { - continue - } - metaDataMap["sourceFolder"] = sourceFolder - - log.Printf("Scanning files in dataset %s", sourceFolder) - - // check if skip flag is globally defined via flags: - if linkfiles != nil { - skip = "dA" // default behaviour = keep internal for all - if *linkfiles == "delete" { - skip = "sA" - } else if *linkfiles == "keep" { - skip = "kA" - } - } - - fullFileArray, startTime, endTime, owner, numFiles, totalSize := - datasetIngestor.AssembleFilelisting(sourceFolder, filelistingPath, &skip) - //log.Printf("full fileListing: %v\n Start and end time: %s %s\n ", fullFileArray, startTime, endTime) - log.Printf("The dataset contains %v files with a total size of %v bytes.", numFiles, totalSize) - - if totalSize == 0 { - emptyDatasets++ - color.Set(color.FgRed) - log.Println("This dataset contains no files and will therefore NOT be stored. ") - color.Unset() - } else if numFiles > TOTAL_MAXFILES { - tooLargeDatasets++ - color.Set(color.FgRed) - log.Printf("This dataset exceeds the current filecount limit of the archive system of %v files and will therefore NOT be stored.\n", TOTAL_MAXFILES) - color.Unset() - } else { - datasetIngestor.UpdateMetaData(client, APIServer, user, originalMap, metaDataMap, startTime, endTime, owner, tapecopies) - pretty, _ := json.MarshalIndent(metaDataMap, "", " ") - - log.Printf("Updated metadata object:\n%s\n", pretty) - - // check if data is accesible at archive server, unless beamline account (assumed to be centrally available always) - // and unless copy flag defined via command line - if !*copyFlag && !*nocopyFlag { - if !beamlineAccount { - err := datasetIngestor.CheckDataCentrallyAvailable(user["username"], RSYNCServer, sourceFolder) - if err != nil { - color.Set(color.FgYellow) - log.Printf("The source folder %v is not centrally available (decentral use case).\nThe data must first be copied to a rsync cache server.\n ", sourceFolder) - color.Unset() - *copyFlag = true - // check if user account - if len(accessGroups) == 0 { - color.Set(color.FgRed) - log.Println("For the decentral case you must use a personal account. Beamline accounts are not supported.") - color.Unset() - os.Exit(1) - } - if !*noninteractiveFlag { - log.Printf("Do you want to continue (Y/n)? ") - scanner.Scan() - continueFlag := scanner.Text() - if continueFlag == "n" { - log.Fatalln("Further ingests interrupted because decentral case detected, but no copy wanted.") - } - } - } - } else { - *copyFlag = false - } - } else { - if !*copyFlag { - *copyFlag = !*nocopyFlag - } - } - if *ingestFlag { - // create ingest . For decentral case delay setting status to archivable until data is copied - archivable := false - if _, ok := metaDataMap["datasetlifecycle"]; !ok { - metaDataMap["datasetlifecycle"] = map[string]interface{}{} - } - if *copyFlag { - // do not override existing fields - metaDataMap["datasetlifecycle"].(map[string]interface{})["isOnCentralDisk"] = false - metaDataMap["datasetlifecycle"].(map[string]interface{})["archiveStatusMessage"] = "filesNotYetAvailable" - metaDataMap["datasetlifecycle"].(map[string]interface{})["archivable"] = archivable - } else { - archivable = true - metaDataMap["datasetlifecycle"].(map[string]interface{})["isOnCentralDisk"] = true - metaDataMap["datasetlifecycle"].(map[string]interface{})["archiveStatusMessage"] = "datasetCreated" - metaDataMap["datasetlifecycle"].(map[string]interface{})["archivable"] = archivable - } - datasetId := datasetIngestor.SendIngestCommand(client, APIServer, metaDataMap, fullFileArray, user) - // add attachment optionally - if *addAttachment != "" { - datasetIngestor.AddAttachment(client, APIServer, datasetId, metaDataMap, user["accessToken"], *addAttachment, *addCaption) - } - if *copyFlag { - err := datasetIngestor.SyncDataToFileserver(datasetId, user, RSYNCServer, sourceFolder, absFileListing) - if err == nil { - // delayed enabling - archivable = true - datasetIngestor.SendFilesReadyCommand(client, APIServer, datasetId, user) - } else { - color.Set(color.FgRed) - log.Printf("The command to copy files exited with error %v \n", err) - log.Printf("The dataset %v is not yet in an archivable state\n", datasetId) - // TODO let user decide to delete dataset entry - // datasetIngestor.DeleteDatasetEntry(client, APIServer, datasetId, user["accessToken"]) - color.Unset() - } - } - - if archivable { - datasetList = append(datasetList, datasetId) - } - } - datasetIngestor.ResetUpdatedMetaData(originalMap, metaDataMap) - } - } - - if !*ingestFlag { - color.Set(color.FgRed) - log.Printf("Note: you run in 'dry' mode to simply to check data consistency. Use the --ingest flag to really ingest datasets.") - } - - if emptyDatasets > 0 { - color.Set(color.FgRed) - log.Printf("Number of datasets not stored because they are empty:%v\n. Please note that this will cancel any subsequent archive steps from this job !\n", emptyDatasets) - } - if tooLargeDatasets > 0 { - color.Set(color.FgRed) - log.Printf("Number of datasets not stored because of too many files:%v\nPlease note that this will cancel any subsequent archive steps from this job !\n", tooLargeDatasets) - } - color.Unset() - datasetIngestor.PrintFileInfos() - - // stop here if empty datasets appeared - if emptyDatasets > 0 || tooLargeDatasets > 0 { - os.Exit(1) - } - // start archive job - if *autoarchiveFlag && *ingestFlag { - log.Printf("Submitting Archive Job for the ingested datasets.\n") - datasetUtils.CreateJob(client, APIServer, user, datasetList, tapecopies) - } - - // print out results to STDOUT, one line per dataset - for i := 0; i < len(datasetList); i++ { - fmt.Println(datasetList[i]) - } -} diff --git a/cmd/datasetIngestor/main_test.go b/cmd/datasetIngestor/main_test.go deleted file mode 100644 index ea5748b..0000000 --- a/cmd/datasetIngestor/main_test.go +++ /dev/null @@ -1,175 +0,0 @@ -package main - -import ( - "bytes" - "flag" - "os" - "testing" - - "github.com/paulscherrerinstitute/scicat/datasetUtils" -) - -// TestMainOutput is a test function that verifies the output of the main function. -// It captures the stdout, runs the main function, and checks if the output contains the expected strings. -// This just checks if the main function prints the help message. -func TestMainOutput(t *testing.T) { - flag.CommandLine = flag.NewFlagSet("flag output", flag.ContinueOnError) - os.Args = []string{"test"} - - os.Setenv("TEST_MODE", "true") - oldTestMode := "false" - defer os.Setenv("TEST_MODE", oldTestMode) - // Capture stdout - // The variable 'oldO' stores the original value of the standard output (os.Stdout). - oldO := os.Stdout - oldE := os.Stderr - // rO is a ReadCloser that represents the read end of the pipe. - // wO is a WriteCloser that represents the write end of the pipe. - // err is an error variable. - // The os.Pipe() function in Go is used to create a synchronous in-memory pipe. It can be used for communication between different parts of the program. - // The `os.Pipe()` function in Go is used to create a synchronous in-memory pipe. It can be used for communication between different parts of your program. - // This function returns two values: a `*os.File` for reading and a `*os.File` for writing. When you write data to the write end of the pipe, it becomes available to read from the read end of the pipe. This can be useful for passing data between goroutines or between different parts of your program without using the disk. - rO, wO, errO := os.Pipe() - _, wE, errE := os.Pipe() - if errO != nil || errE != nil { - // The Fatalf method is similar to log.Fatalf or fmt.Printf in that it formats a string according to a format specifier and arguments, then logs that string as an error message. However, in addition to this, Fatalf also ends the test immediately. No further code in the test function will be executed, and the test will be marked as failed. - t.Fatalf("Could not start the test. Error in reading the file: %v", errO) - } - // redirect the standard output (os.Stdout) to a different destination, represented by w. - // also redirect stderr to hide it only - // By default, anything written to os.Stdout will be printed to the terminal. - // The w in this line of code is expected to be a value that satisfies the io.Writer interface, which means it has a Write method. This could be a file, a buffer, a network connection, or any other type of destination for output. - // Since w is connected to r, anything written to w can be read from r. This is how we will capture the output of the main function. - os.Stdout = wO - os.Stderr = wE - - // Run main function (assuming your main function does not take any arguments) - main() - - // Restore stdout & stderr after running main - os.Stdout = oldO - os.Stderr = oldE - - // Close pipe writer to flush the output - wO.Close() - wE.Close() - - //declares a variable named buf of type bytes.Buffer. The bytes.Buffer type is a struct provided by the Go standard library that implements the io.Reader and io.Writer interfaces. - var buf bytes.Buffer - // Copy pipe reader output to buf - // ReadFrom reads data from the given reader r and writes it to the buffer buf. - // It returns the number of bytes read and any error encountered. - _, err := buf.ReadFrom(rO) - if err != nil { - t.Fatalf("Error reading output: %v", err) - } - - // Check if the output contains expected strings - expected := "\n\nTool to ingest datasets to the data catalog.\n\n" - if !bytes.Contains(buf.Bytes(), []byte(expected)) { - t.Errorf("Expected output %q not found in %q", expected, buf.String()) - } -} - -func TestMainFlags(t *testing.T) { - // test cases - tests := []struct { - name string - flags map[string]interface{} - args []string - }{ - { - name: "Test without flags", - flags: map[string]interface{}{ - "ingest": false, - "testenv": false, - "devenv": false, - "localenv": false, - "tunnelenv": false, - "noninteractive": false, - "copy": false, - "nocopy": false, - "autoarchive": false, - "allowexistingsource": false, - "version": false, - "user": "", - "token": "", - "linkfiles": "keepInternalOnly", - "addattachment": "", - "addcaption": "", - "tapecopies": 0, - }, - args: []string{"test"}, - }, - { - name: "Set all flags", - flags: map[string]interface{}{ - "ingest": true, - "testenv": true, - "devenv": true, - "localenv": true, - "tunnelenv": true, - "noninteractive": true, - "copy": true, - "nocopy": true, - "autoarchive": true, - "allowexistingsource": true, - "version": true, - "user": "usertest:passtest", - "token": "token", - "linkfiles": "somerandomstring", - "addattachment": "random attachment string", - "addcaption": "a seemingly random caption", - "tapecopies": 6571579, - }, - args: []string{ - "test", - "--ingest", - "--testenv", - "--devenv", - "--localenv", - "--tunnelenv", - "--noninteractive", - "--user", - "usertest:passtest", - "--token", - "token", - "--copy", - "--nocopy", - "--tapecopies", - "6571579", - "--autoarchive", - "--linkfiles", - "somerandomstring", - "--allowexistingsource", - "--addattachment", - "random attachment string", - "--addcaption", - "a seemingly random caption", - "--version", - }, - }, - } - - // running test cases - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - flag.CommandLine = flag.NewFlagSet(test.name, flag.ExitOnError) - datasetUtils.TestFlags = func(flags map[string]interface{}) { - passing := true - for flag := range test.flags { - if flags[flag] != test.flags[flag] { - t.Logf("%s's value should be \"%v\" but it's \"%v\", or non-matching type", flag, test.flags[flag], flags[flag]) - passing = false - } - } - if !passing { - t.Fail() - } - } - - os.Args = test.args - main() - }) - } -} diff --git a/cmd/datasetIngestor/metadata.json b/cmd/datasetIngestor/metadata.json deleted file mode 100644 index 3f69f39..0000000 --- a/cmd/datasetIngestor/metadata.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "creationLocation": "/PSI/SLS/CSAXS", - "datasetName": "CMakeCache", - "description": "", - "owner": "Ana Diaz", - "ownerEmail": "ana.diaz@psi.ch", - "ownerGroup": "p17301", - "principalInvestigator": "ana.diaz@psi.ch", - "scientificMetadata": [ - { - "sample": { - "description": "", - "name": "", - "principalInvestigator": "" - } - } - ], - "sourceFolder": "/usr/share/gnome", - "type": "raw" -} diff --git a/cmd/datasetPublishData/downloadPage.html b/cmd/datasetPublishData/downloadPage.html deleted file mode 100644 index 4707aab..0000000 --- a/cmd/datasetPublishData/downloadPage.html +++ /dev/null @@ -1,35 +0,0 @@ - - -
- - - - - - -Install the wget command for your platform if not yet available. Then type the following commands in your - destination folder, which has enough capacity
-cd destinationFolder
- And then the transfer commands:
-
-
- {{range $i, $e := .BrowseUrls}} - wget -m -np {{.}}
- ( size: {{index $.SizeArray $i}} , nFiles: {{index $.NumFilesArray $i}} )
- {{end}}
-
-
- You can simply repeat the wget command in case the connection is interrupted. In this case only files not yet - downloaded will be fetched.
- -Cite as DOI: {{.Doi}}
- -
This work
- is licensed under a Creative Commons
- Attribution-ShareAlike 4.0 International License.
-