From 02761eb97d0ea2a0e32f5e028fc68ea0e91c531a Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" Date: Mon, 26 Feb 2024 16:39:48 +0100 Subject: [PATCH 1/9] Add test - not passing --- datasetIngestor/checkMetadata_test.go | 102 ++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 datasetIngestor/checkMetadata_test.go diff --git a/datasetIngestor/checkMetadata_test.go b/datasetIngestor/checkMetadata_test.go new file mode 100644 index 0000000..22c0a35 --- /dev/null +++ b/datasetIngestor/checkMetadata_test.go @@ -0,0 +1,102 @@ +package datasetIngestor + +import ( + "encoding/json" + "io/ioutil" + "io" + "net/http" + "strings" + "testing" + "os" +) + +func TestGetHost(t *testing.T) { + host := getHost() + + if len(host) == 0 { + t.Errorf("getHost() returned an empty string") + } + + if host == "unknown" { + t.Errorf("getHost() was unable to get the hostname") + } +} + +func TestCheckMetadata(t *testing.T) { + // Mock HTTP client + mockClient := &http.Client{ + Transport: RoundTripFunc(func(req *http.Request) *http.Response { + // Prepare a mock response for the HTTP client + return &http.Response{ + StatusCode: 200, + Body: io.NopCloser(strings.NewReader(`{"valid":true}`)), + Header: make(http.Header), + } + }), + } + + // Mock user map + mockUser := map[string]string{ + "displayName": "testuser", + "mail": "testuser@example.com", + } + + // Mock access groups + mockAccessGroups := []string{"group1", "group2"} + + // Mock metadata file content + mockMetadata := map[string]interface{}{ + "type": "raw", + // Add other required fields as needed for testing + } + + // Convert metadata to JSON + mockMetadataJSON, err := json.Marshal(mockMetadata) + if err != nil { + t.Fatalf("Error marshaling mock metadata: %v", err) + } + + // Create a temporary file for mock metadata + tmpfile, err := ioutil.TempFile("", "mockmetadata.json") + if err != nil { + t.Fatalf("Error creating temporary file: %v", err) + } + defer tmpfile.Close() + defer func() { + // Clean up temporary file + if err := tmpfile.Close(); err != nil { + t.Fatalf("Error closing temporary file: %v", err) + } + if err := os.Remove(tmpfile.Name()); err != nil { + t.Fatalf("Error removing temporary file: %v", err) + } + }() + + // Write mock metadata JSON to the temporary file + if _, err := tmpfile.Write(mockMetadataJSON); err != nil { + t.Fatalf("Error writing mock metadata to temporary file: %v", err) + } + + // Call the function with mock parameters + metaDataMap, sourceFolder, beamlineAccount := CheckMetadata(mockClient, "http://example.com/api", tmpfile.Name(), mockUser, mockAccessGroups) + + // Add assertions here based on the expected behavior of the function + // For example: + if len(metaDataMap) == 0 { + t.Error("Expected non-empty metadata map") + } + if sourceFolder == "" { + t.Error("Expected non-empty source folder") + } + if !beamlineAccount { + t.Error("Expected beamline account to be true") + } +} + +// RoundTripFunc type is a custom implementation of http.RoundTripper +type RoundTripFunc func(req *http.Request) *http.Response + +// RoundTrip executes a single HTTP transaction, returning a Response for the provided Request. +func (f RoundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) { + return f(req), nil +} From 629d0724207b9e61aee7f7e51aad799d1a51fcea Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" <3798865+kavir1698@users.noreply.github.com> Date: Fri, 22 Mar 2024 13:21:18 +0100 Subject: [PATCH 2/9] Add comments and testdata dir --- datasetIngestor/checkMetadata.go | 32 ++++++++++++++------ datasetIngestor/checkMetadata_test.go | 4 +++ datasetIngestor/testdata/metadata-short.json | 8 +++++ datasetIngestor/testdata/metadata.json | 20 ++++++++++++ 4 files changed, 54 insertions(+), 10 deletions(-) create mode 100644 datasetIngestor/testdata/metadata-short.json create mode 100644 datasetIngestor/testdata/metadata.json diff --git a/datasetIngestor/checkMetadata.go b/datasetIngestor/checkMetadata.go index d9fc553..ff2da94 100644 --- a/datasetIngestor/checkMetadata.go +++ b/datasetIngestor/checkMetadata.go @@ -18,7 +18,10 @@ import ( const DUMMY_TIME = "2300-01-01T11:11:11.000Z" const DUMMY_OWNER = "x12345" +// getHost is a function that attempts to retrieve and return the fully qualified domain name (FQDN) of the current host. +// If it encounters any error during the process, it gracefully falls back to returning the simple hostname or "unknown". func getHost() string { + // Try to get the hostname of the current machine. hostname, err := os.Hostname() if err != nil { return "unknown" @@ -46,27 +49,34 @@ func getHost() string { return hostname } -func CheckMetadata(client *http.Client, APIServer string, metadatafile string, user map[string]string, - accessGroups []string) (metaDataMap map[string]interface{}, sourceFolder string, beamlineAccount bool) { +// CheckMetadata is a function that validates and augments metadata for a dataset. +// It takes an HTTP client, an API server URL, a metadata file path, a user map, and a list of access groups as input. +// It returns a map of metadata, a source folder string, and a boolean indicating whether the dataset belongs to a beamline account. +func CheckMetadata(client *http.Client, APIServer string, metadatafile string, user map[string]string, accessGroups []string) (metaDataMap map[string]interface{}, sourceFolder string, beamlineAccount bool) { - // read full meta data + // Read the full metadata from the file. b, err := ioutil.ReadFile(metadatafile) // just pass the file name if err != nil { log.Fatal(err) } - var metadataObj interface{} + // Unmarshal the JSON metadata into an interface{} object. + var metadataObj interface{} // Using interface{} allows metadataObj to hold any type of data, since it has no defined methods. err = json.Unmarshal(b, &metadataObj) if err != nil { log.Fatal(err) } - // use type assertion to access f's underlying map - metaDataMap = metadataObj.(map[string]interface{}) + + // Use type assertion to convert the interface{} object to a map[string]interface{}. + metaDataMap = metadataObj.(map[string]interface{}) // `.(` is type assertion: a way to extract the underlying value of an interface and check whether it's of a specific type. beamlineAccount = false + // If the user is not the ingestor, check whether any of the accessGroups equal the ownerGroup. Otherwise, check for beamline-specific accounts. if user["displayName"] != "ingestor" { - if ownerGroup, ok := metaDataMap["ownerGroup"]; ok { + // Check if the metadata contains the "ownerGroup" key. + if ownerGroup, ok := metaDataMap["ownerGroup"]; ok { // type assertion with a comma-ok idiom validOwner := false + // Iterate over accessGroups to validate the owner group. for _, b := range accessGroups { if b == ownerGroup { validOwner = true @@ -76,13 +86,15 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u if validOwner { log.Printf("OwnerGroup information %s verified successfully.\n", ownerGroup) } else { - // check for beamline specific account if raw data + // If the owner group is not valid, check for beamline-specific accounts. if creationLocation, ok := metaDataMap["creationLocation"]; ok { + // Split the creationLocation string to extract beamline-specific information. parts := strings.Split(creationLocation.(string), "/") expectedAccount := "" if len(parts) == 4 { expectedAccount = strings.ToLower(parts[2]) + strings.ToLower(parts[3]) } + // If the user matches the expected beamline account, grant ingest access. if user["displayName"] == expectedAccount { log.Printf("Beamline specific dataset %s - ingest granted.\n", expectedAccount) beamlineAccount = true @@ -91,7 +103,7 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u } } else { // for other data just check user name - // this is a quick and dirty test. Should be replaced by test for "globalaccess" role + // this is a quick and dirty test. Should be replaced by test for "globalaccess" role. TODO // facilities: ["SLS", "SINQ", "SWISSFEL", "SmuS"], u := user["displayName"] if strings.HasPrefix(u, "sls") || @@ -131,7 +143,7 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u log.Printf("sourceFolderHost field added: %s", metaDataMap["sourceFolderHost"]) } } - // far raw data add PI if missing + // for raw data add PI if missing if val, ok := metaDataMap["type"]; ok { dstype := val.(string) if dstype == "raw" { diff --git a/datasetIngestor/checkMetadata_test.go b/datasetIngestor/checkMetadata_test.go index 22c0a35..d48b92d 100644 --- a/datasetIngestor/checkMetadata_test.go +++ b/datasetIngestor/checkMetadata_test.go @@ -10,13 +10,17 @@ import ( "os" ) +// TestGetHost is a test function for the getHost function. func TestGetHost(t *testing.T) { + // Call the function under test. host := getHost() + // fail the test and report an error if the returned hostname is an empty string. if len(host) == 0 { t.Errorf("getHost() returned an empty string") } + // fail the test and report an error if the returned hostname is "unknown". if host == "unknown" { t.Errorf("getHost() was unable to get the hostname") } diff --git a/datasetIngestor/testdata/metadata-short.json b/datasetIngestor/testdata/metadata-short.json new file mode 100644 index 0000000..138963a --- /dev/null +++ b/datasetIngestor/testdata/metadata-short.json @@ -0,0 +1,8 @@ +{ + "principalInvestigator":"egon.meier@psi.ch", + "creationLocation":"/PSI/SLS/CSAXS", + "sourceFolder": "/tmp/gnome", + "owner": "Andreas Menzel", + "type": "raw", + "ownerGroup": "p17880" +} diff --git a/datasetIngestor/testdata/metadata.json b/datasetIngestor/testdata/metadata.json new file mode 100644 index 0000000..3f69f39 --- /dev/null +++ b/datasetIngestor/testdata/metadata.json @@ -0,0 +1,20 @@ +{ + "creationLocation": "/PSI/SLS/CSAXS", + "datasetName": "CMakeCache", + "description": "", + "owner": "Ana Diaz", + "ownerEmail": "ana.diaz@psi.ch", + "ownerGroup": "p17301", + "principalInvestigator": "ana.diaz@psi.ch", + "scientificMetadata": [ + { + "sample": { + "description": "", + "name": "", + "principalInvestigator": "" + } + } + ], + "sourceFolder": "/usr/share/gnome", + "type": "raw" +} From 38b5d63f91ba751a04610a5aa5e2a7ca3707f811 Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" <3798865+kavir1698@users.noreply.github.com> Date: Fri, 22 Mar 2024 15:40:05 +0100 Subject: [PATCH 3/9] Make the tests pass. --- datasetIngestor/checkMetadata_test.go | 90 ++++++++------------------ datasetIngestor/export_test.go | 4 ++ datasetIngestor/testdata/metadata.json | 2 +- go.mod | 1 + go.sum | 3 +- 5 files changed, 34 insertions(+), 66 deletions(-) create mode 100644 datasetIngestor/export_test.go diff --git a/datasetIngestor/checkMetadata_test.go b/datasetIngestor/checkMetadata_test.go index d48b92d..0e8ee9f 100644 --- a/datasetIngestor/checkMetadata_test.go +++ b/datasetIngestor/checkMetadata_test.go @@ -1,19 +1,15 @@ package datasetIngestor import ( - "encoding/json" - "io/ioutil" - "io" "net/http" - "strings" "testing" - "os" + "time" + "reflect" ) -// TestGetHost is a test function for the getHost function. func TestGetHost(t *testing.T) { // Call the function under test. - host := getHost() + host := GetHost() // fail the test and report an error if the returned hostname is an empty string. if len(host) == 0 { @@ -27,80 +23,46 @@ func TestGetHost(t *testing.T) { } func TestCheckMetadata(t *testing.T) { + // Define mock parameters for the function + var TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" // "https://example.com/api" + var APIServer = TEST_API_SERVER + var metadatafile1 = "testdata/metadata.json" + // var metadatafile2 = "testdata/metadata-short.json" + // Mock HTTP client - mockClient := &http.Client{ - Transport: RoundTripFunc(func(req *http.Request) *http.Response { - // Prepare a mock response for the HTTP client - return &http.Response{ - StatusCode: 200, - Body: io.NopCloser(strings.NewReader(`{"valid":true}`)), - Header: make(http.Header), + client := &http.Client{ + Timeout: 5 * time.Second, // Set a timeout for requests + Transport: &http.Transport{ + // Customize the transport settings if needed (e.g., proxy, TLS config) + // For a dummy client, default settings are usually sufficient + }, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + // Customize how redirects are handled if needed + // For a dummy client, default behavior is usually sufficient + return http.ErrUseLastResponse // Use the last response for redirects + }, } - }), - } // Mock user map - mockUser := map[string]string{ - "displayName": "testuser", + user := map[string]string{ + "displayName": "csaxsswissfel", "mail": "testuser@example.com", } // Mock access groups - mockAccessGroups := []string{"group1", "group2"} - - // Mock metadata file content - mockMetadata := map[string]interface{}{ - "type": "raw", - // Add other required fields as needed for testing - } - - // Convert metadata to JSON - mockMetadataJSON, err := json.Marshal(mockMetadata) - if err != nil { - t.Fatalf("Error marshaling mock metadata: %v", err) - } - - // Create a temporary file for mock metadata - tmpfile, err := ioutil.TempFile("", "mockmetadata.json") - if err != nil { - t.Fatalf("Error creating temporary file: %v", err) - } - defer tmpfile.Close() - defer func() { - // Clean up temporary file - if err := tmpfile.Close(); err != nil { - t.Fatalf("Error closing temporary file: %v", err) - } - if err := os.Remove(tmpfile.Name()); err != nil { - t.Fatalf("Error removing temporary file: %v", err) - } - }() - - // Write mock metadata JSON to the temporary file - if _, err := tmpfile.Write(mockMetadataJSON); err != nil { - t.Fatalf("Error writing mock metadata to temporary file: %v", err) - } + accessGroups := []string{"group1", "p17301"} // Call the function with mock parameters - metaDataMap, sourceFolder, beamlineAccount := CheckMetadata(mockClient, "http://example.com/api", tmpfile.Name(), mockUser, mockAccessGroups) + metaDataMap, sourceFolder, beamlineAccount := CheckMetadata(client, APIServer, metadatafile1, user, accessGroups) // Add assertions here based on the expected behavior of the function - // For example: if len(metaDataMap) == 0 { t.Error("Expected non-empty metadata map") } if sourceFolder == "" { t.Error("Expected non-empty source folder") } - if !beamlineAccount { - t.Error("Expected beamline account to be true") + if reflect.TypeOf(beamlineAccount).Kind() != reflect.Bool { + t.Error("Expected beamlineAccount to be boolean") } } - -// RoundTripFunc type is a custom implementation of http.RoundTripper -type RoundTripFunc func(req *http.Request) *http.Response - -// RoundTrip executes a single HTTP transaction, returning a Response for the provided Request. -func (f RoundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) { - return f(req), nil -} diff --git a/datasetIngestor/export_test.go b/datasetIngestor/export_test.go new file mode 100644 index 0000000..760dadd --- /dev/null +++ b/datasetIngestor/export_test.go @@ -0,0 +1,4 @@ +// This file exports internal functions for testing purposes. Since the name of the file ends with "_test.go", it will not be included in the final build of the application. +package datasetIngestor + +var GetHost = getHost \ No newline at end of file diff --git a/datasetIngestor/testdata/metadata.json b/datasetIngestor/testdata/metadata.json index 3f69f39..100db21 100644 --- a/datasetIngestor/testdata/metadata.json +++ b/datasetIngestor/testdata/metadata.json @@ -1,5 +1,5 @@ { - "creationLocation": "/PSI/SLS/CSAXS", + "creationLocation": "/PSI/SLS/CSAXS/SWISSFEL", "datasetName": "CMakeCache", "description": "", "owner": "Ana Diaz", diff --git a/go.mod b/go.mod index 61040a7..3eeddb8 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ require ( require ( github.com/creack/pty v1.1.17 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect github.com/mattn/go-colorable v0.1.9 // indirect github.com/mattn/go-isatty v0.0.14 // indirect golang.org/x/sys v0.2.0 // indirect diff --git a/go.sum b/go.sum index 00500b7..4887fa9 100644 --- a/go.sum +++ b/go.sum @@ -2,8 +2,9 @@ github.com/Netflix/go-expect v0.0.0-20220104043353-73e0943537d2 h1:+vx7roKuyA63n github.com/Netflix/go-expect v0.0.0-20220104043353-73e0943537d2/go.mod h1:HBCaDeC1lPdgDeDbhX8XFpy1jqjK0IBG8W5K+xYqA0w= github.com/creack/pty v1.1.17 h1:QeVUsEDNrLBW4tMgZHvxy18sKtr6VI492kBhUfhDJNI= github.com/creack/pty v1.1.17/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w= github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs= From 3c7a584def4f406a306a2981611d18c8f081d127 Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" <3798865+kavir1698@users.noreply.github.com> Date: Fri, 22 Mar 2024 16:06:01 +0100 Subject: [PATCH 4/9] Add more tests --- datasetIngestor/checkMetadata_test.go | 54 +++++++++++++++++++- datasetIngestor/export_test.go | 2 +- datasetIngestor/testdata/metadata-short.json | 2 +- 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/datasetIngestor/checkMetadata_test.go b/datasetIngestor/checkMetadata_test.go index 0e8ee9f..955b5cd 100644 --- a/datasetIngestor/checkMetadata_test.go +++ b/datasetIngestor/checkMetadata_test.go @@ -27,7 +27,7 @@ func TestCheckMetadata(t *testing.T) { var TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" // "https://example.com/api" var APIServer = TEST_API_SERVER var metadatafile1 = "testdata/metadata.json" - // var metadatafile2 = "testdata/metadata-short.json" + var metadatafile2 = "testdata/metadata-short.json" // Mock HTTP client client := &http.Client{ @@ -50,7 +50,7 @@ func TestCheckMetadata(t *testing.T) { } // Mock access groups - accessGroups := []string{"group1", "p17301"} + accessGroups := []string{"p17880", "p17301"} // Call the function with mock parameters metaDataMap, sourceFolder, beamlineAccount := CheckMetadata(client, APIServer, metadatafile1, user, accessGroups) @@ -62,7 +62,57 @@ func TestCheckMetadata(t *testing.T) { if sourceFolder == "" { t.Error("Expected non-empty source folder") } + if sourceFolder != "/usr/share/gnome" { + t.Error("sourceFolder should be '/usr/share/gnome'") + } if reflect.TypeOf(beamlineAccount).Kind() != reflect.Bool { t.Error("Expected beamlineAccount to be boolean") } + if beamlineAccount != false { + t.Error("Expected beamlineAccount to be false") + } + if _, ok := metaDataMap["ownerEmail"]; !ok { + t.Error("metaDataMap missing required key 'ownerEmail'") + } + if _, ok := metaDataMap["principalInvestigator"]; !ok { + t.Error("metaDataMap missing required key 'principalInvestigator'") + } + if _, ok := metaDataMap["scientificMetadata"]; !ok { + t.Error("metaDataMap missing required key 'scientificMetadata'") + } + scientificMetadata, ok := metaDataMap["scientificMetadata"].([]interface{}) + if ok { + firstEntry := scientificMetadata[0].(map[string]interface{}) + sample, ok := firstEntry["sample"].(map[string]interface{}) + if ok { + if _, ok := sample["name"]; !ok { + t.Error("Sample is missing 'name' field") + } + if _, ok := sample["description"]; !ok { + t.Error("Sample is missing 'description' field") + } + } + } else { + t.Error("scientificMetadata is not a list") + } + + // test with the second metadata file + metaDataMap2, sourceFolder2, beamlineAccount2 := CheckMetadata(client, APIServer, metadatafile2, user, accessGroups) + + // Add assertions here based on the expected behavior of the function + if len(metaDataMap2) == 0 { + t.Error("Expected non-empty metadata map") + } + if sourceFolder2 == "" { + t.Error("Expected non-empty source folder") + } + if sourceFolder2 != "/tmp/gnome" { + t.Error("sourceFolder should be '/tmp/gnome'") + } + if reflect.TypeOf(beamlineAccount2).Kind() != reflect.Bool { + t.Error("Expected beamlineAccount to be boolean") + } + if beamlineAccount2 != false { + t.Error("Expected beamlineAccount to be false") + } } diff --git a/datasetIngestor/export_test.go b/datasetIngestor/export_test.go index 760dadd..0b97c80 100644 --- a/datasetIngestor/export_test.go +++ b/datasetIngestor/export_test.go @@ -1,4 +1,4 @@ // This file exports internal functions for testing purposes. Since the name of the file ends with "_test.go", it will not be included in the final build of the application. package datasetIngestor -var GetHost = getHost \ No newline at end of file +var GetHost = getHost diff --git a/datasetIngestor/testdata/metadata-short.json b/datasetIngestor/testdata/metadata-short.json index 138963a..06b802c 100644 --- a/datasetIngestor/testdata/metadata-short.json +++ b/datasetIngestor/testdata/metadata-short.json @@ -1,6 +1,6 @@ { "principalInvestigator":"egon.meier@psi.ch", - "creationLocation":"/PSI/SLS/CSAXS", + "creationLocation":"/PSI/SLS/CSAXS/SWISSFEL", "sourceFolder": "/tmp/gnome", "owner": "Andreas Menzel", "type": "raw", From 0793a80738dba73e1ad0afcc430afdfb19530d41 Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" <3798865+kavir1698@users.noreply.github.com> Date: Fri, 22 Mar 2024 22:34:26 +0100 Subject: [PATCH 5/9] Check illegal characters --- datasetIngestor/checkMetadata.go | 45 ++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/datasetIngestor/checkMetadata.go b/datasetIngestor/checkMetadata.go index d9fc553..003a72d 100644 --- a/datasetIngestor/checkMetadata.go +++ b/datasetIngestor/checkMetadata.go @@ -64,6 +64,11 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u metaDataMap = metadataObj.(map[string]interface{}) beamlineAccount = false + // Check scientificMetadata for illegal keys + if checkIllegalKeys(metaDataMap) { + log.Fatal("Metadata contains keys with illegal characters (., [], or <>).") + } + if user["displayName"] != "ingestor" { if ownerGroup, ok := metaDataMap["ownerGroup"]; ok { validOwner := false @@ -259,3 +264,43 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u return metaDataMap, sourceFolder, beamlineAccount } + +// checkIllegalKeys checks whether the metadata contains illegal keys +func checkIllegalKeys(metadata map[string]interface{}) bool { + for _, item := range metadata { + // Use a type switch to check if the item is a slice of interface{} type, e.g. the scientificMetadata key. + switch v := item.(type) { + case []interface{}: + // If the item is a slice of interface{}, iterate over each subItem in the slice. + for _, subItem := range v { + if subMap, ok := subItem.(map[string]interface{}); ok { + if containsIllegalKeys(subMap) { + return true + } + } + } + } + } + return false +} + +// containsIllegalKeys checks whether the key contains illegal characters +func containsIllegalKeys(m map[string]interface{}) bool { + for k := range m { + if containsIllegalCharacters(k) { + return true + } + } + return false +} + +func containsIllegalCharacters(s string) bool { + // Check if the string contains periods, brackets, or other illegal characters + // You can adjust this condition based on your specific requirements + for _, char := range s { + if char == '.' || char == '[' || char == ']' || char == '<' || char == '>' { + return true + } + } + return false +} \ No newline at end of file From d71a203f14f9ed7538e06e19b735c7f22964231d Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" Date: Mon, 25 Mar 2024 12:28:14 +0100 Subject: [PATCH 6/9] Fix the tests --- datasetIngestor/checkMetadata.go | 54 +++--- datasetIngestor/checkMetadata_test.go | 159 ++++++++++++++++++ datasetIngestor/testdata/metadata-short.json | 8 + datasetIngestor/testdata/metadata.json | 20 +++ .../testdata/metadata_illegal.json | 20 +++ 5 files changed, 235 insertions(+), 26 deletions(-) create mode 100644 datasetIngestor/checkMetadata_test.go create mode 100644 datasetIngestor/testdata/metadata-short.json create mode 100644 datasetIngestor/testdata/metadata.json create mode 100644 datasetIngestor/testdata/metadata_illegal.json diff --git a/datasetIngestor/checkMetadata.go b/datasetIngestor/checkMetadata.go index 003a72d..a06742f 100644 --- a/datasetIngestor/checkMetadata.go +++ b/datasetIngestor/checkMetadata.go @@ -2,8 +2,8 @@ package datasetIngestor import ( "bytes" - "github.com/paulscherrerinstitute/scicat/datasetUtils" "encoding/json" + "github.com/paulscherrerinstitute/scicat/datasetUtils" "io/ioutil" "log" "net" @@ -60,17 +60,19 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u if err != nil { log.Fatal(err) } - // use type assertion to access f's underlying map - metaDataMap = metadataObj.(map[string]interface{}) + + // Use type assertion to convert the interface{} object to a map[string]interface{}. + metaDataMap = metadataObj.(map[string]interface{}) // `.(` is type assertion: a way to extract the underlying value of an interface and check whether it's of a specific type. beamlineAccount = false - // Check scientificMetadata for illegal keys + // Check scientificMetadata for illegal keys if checkIllegalKeys(metaDataMap) { - log.Fatal("Metadata contains keys with illegal characters (., [], or <>).") + panic("Metadata contains keys with illegal characters (., [], $, or <>).") } if user["displayName"] != "ingestor" { - if ownerGroup, ok := metaDataMap["ownerGroup"]; ok { + // Check if the metadata contains the "ownerGroup" key. + if ownerGroup, ok := metaDataMap["ownerGroup"]; ok { // type assertion with a comma-ok idiom validOwner := false for _, b := range accessGroups { if b == ownerGroup { @@ -219,6 +221,9 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u //fmt.Printf("Marshalled meta data : %s\n", string(bmm)) // now check validity req, err := http.NewRequest("POST", myurl, bytes.NewBuffer(bmm)) + if err != nil { + log.Fatal(err) + } req.Header.Set("Content-Type", "application/json") resp, err := client.Do(req) @@ -265,18 +270,25 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u } -// checkIllegalKeys checks whether the metadata contains illegal keys func checkIllegalKeys(metadata map[string]interface{}) bool { - for _, item := range metadata { - // Use a type switch to check if the item is a slice of interface{} type, e.g. the scientificMetadata key. - switch v := item.(type) { + for key, value := range metadata { + if containsIllegalCharacters(key) { + return true + } + + switch v := value.(type) { + case map[string]interface{}: + if checkIllegalKeys(v) { + return true + } case []interface{}: - // If the item is a slice of interface{}, iterate over each subItem in the slice. - for _, subItem := range v { - if subMap, ok := subItem.(map[string]interface{}); ok { - if containsIllegalKeys(subMap) { + for _, item := range v { + switch itemValue := item.(type) { // Type switch on array item + case map[string]interface{}: + if checkIllegalKeys(itemValue) { return true } + // Add other cases if needed } } } @@ -284,23 +296,13 @@ func checkIllegalKeys(metadata map[string]interface{}) bool { return false } -// containsIllegalKeys checks whether the key contains illegal characters -func containsIllegalKeys(m map[string]interface{}) bool { - for k := range m { - if containsIllegalCharacters(k) { - return true - } - } - return false -} - func containsIllegalCharacters(s string) bool { // Check if the string contains periods, brackets, or other illegal characters // You can adjust this condition based on your specific requirements for _, char := range s { - if char == '.' || char == '[' || char == ']' || char == '<' || char == '>' { + if char == '.' || char == '[' || char == ']' || char == '<' || char == '>' || char == '$' { return true } } return false -} \ No newline at end of file +} diff --git a/datasetIngestor/checkMetadata_test.go b/datasetIngestor/checkMetadata_test.go new file mode 100644 index 0000000..5e86d8d --- /dev/null +++ b/datasetIngestor/checkMetadata_test.go @@ -0,0 +1,159 @@ +package datasetIngestor + +import ( + "net/http" + "reflect" + "testing" + "time" +) + +func TestGetHost(t *testing.T) { + // Call the function under test. + host := GetHost() + + // fail the test and report an error if the returned hostname is an empty string. + if len(host) == 0 { + t.Errorf("getHost() returned an empty string") + } + + // fail the test and report an error if the returned hostname is "unknown". + if host == "unknown" { + t.Errorf("getHost() was unable to get the hostname") + } +} + +func TestCheckMetadata(t *testing.T) { + // Define mock parameters for the function + var TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" // "https://example.com/api" + var APIServer = TEST_API_SERVER + var metadatafile1 = "testdata/metadata.json" + var metadatafile2 = "testdata/metadata-short.json" + // var metadatafile3 = "testdata/metadata_illegal.json" + + // Mock HTTP client + client := &http.Client{ + Timeout: 5 * time.Second, // Set a timeout for requests + Transport: &http.Transport{ + // Customize the transport settings if needed (e.g., proxy, TLS config) + // For a dummy client, default settings are usually sufficient + }, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + // Customize how redirects are handled if needed + // For a dummy client, default behavior is usually sufficient + return http.ErrUseLastResponse // Use the last response for redirects + }, + } + + // Mock user map + user := map[string]string{ + "displayName": "csaxsswissfel", + "mail": "testuser@example.com", + } + + // Mock access groups + accessGroups := []string{"group1", "group2"} + + // Call the function with mock parameters + metaDataMap, sourceFolder, beamlineAccount := CheckMetadata(client, APIServer, metadatafile1, user, accessGroups) + + // Add assertions here based on the expected behavior of the function + if len(metaDataMap) == 0 { + t.Error("Expected non-empty metadata map") + } + if sourceFolder == "" { + t.Error("Expected non-empty source folder") + } + if sourceFolder != "/usr/share/gnome" { + t.Error("sourceFolder should be '/usr/share/gnome'") + } + if reflect.TypeOf(beamlineAccount).Kind() != reflect.Bool { + t.Error("Expected beamlineAccount to be boolean") + } + if beamlineAccount != false { + t.Error("Expected beamlineAccount to be false") + } + if _, ok := metaDataMap["ownerEmail"]; !ok { + t.Error("metaDataMap missing required key 'ownerEmail'") + } + if _, ok := metaDataMap["principalInvestigator"]; !ok { + t.Error("metaDataMap missing required key 'principalInvestigator'") + } + if _, ok := metaDataMap["scientificMetadata"]; !ok { + t.Error("metaDataMap missing required key 'scientificMetadata'") + } + scientificMetadata, ok := metaDataMap["scientificMetadata"].([]interface{}) + if ok { + firstEntry := scientificMetadata[0].(map[string]interface{}) + sample, ok := firstEntry["sample"].(map[string]interface{}) + if ok { + if _, ok := sample["name"]; !ok { + t.Error("Sample is missing 'name' field") + } + if _, ok := sample["description"]; !ok { + t.Error("Sample is missing 'description' field") + } + } + } else { + t.Error("scientificMetadata is not a list") + } + + // test with the second metadata file + metaDataMap2, sourceFolder2, beamlineAccount2 := CheckMetadata(client, APIServer, metadatafile2, user, accessGroups) + + // Add assertions here based on the expected behavior of the function + if len(metaDataMap2) == 0 { + t.Error("Expected non-empty metadata map") + } + if sourceFolder2 == "" { + t.Error("Expected non-empty source folder") + } + if sourceFolder2 != "/tmp/gnome" { + t.Error("sourceFolder should be '/tmp/gnome'") + } + if reflect.TypeOf(beamlineAccount2).Kind() != reflect.Bool { + t.Error("Expected beamlineAccount to be boolean") + } + if beamlineAccount2 != false { + t.Error("Expected beamlineAccount to be false") + } +} + +func TestCheckMetadata_CrashCase(t *testing.T) { + defer func() { + if recover() != nil { + t.Log("Function crashed as expected") + } else { + t.Fatal("Function did not crash as expected") + } + }() + + // Define mock parameters for the function + var TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" // "https://example.com/api" + var APIServer = TEST_API_SERVER + var metadatafile3 = "testdata/metadata_illegal.json" + + // Mock HTTP client + client := &http.Client{ + Timeout: 5 * time.Second, // Set a timeout for requests + Transport: &http.Transport{ + // Customize the transport settings if needed (e.g., proxy, TLS config) + // For a dummy client, default settings are usually sufficient + }, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + // Customize how redirects are handled if needed + // For a dummy client, default behavior is usually sufficient + return http.ErrUseLastResponse // Use the last response for redirects + }, + } + + // Mock user map + user := map[string]string{ + "displayName": "csaxsswissfel", + "mail": "testuser@example.com", + } + + // Mock access groups + accessGroups := []string{"group1", "group2"} + // Call the function that should crash + CheckMetadata(client, APIServer, metadatafile3, user, accessGroups) +} diff --git a/datasetIngestor/testdata/metadata-short.json b/datasetIngestor/testdata/metadata-short.json new file mode 100644 index 0000000..fbe4a5b --- /dev/null +++ b/datasetIngestor/testdata/metadata-short.json @@ -0,0 +1,8 @@ +{ + "principalInvestigator":"egon.meier@psi.ch", + "creationLocation":"/PSI/SLS/CSAXS/SWISSFEL", + "sourceFolder": "/tmp/gnome", + "owner": "first last", + "type": "raw", + "ownerGroup": "group1" +} diff --git a/datasetIngestor/testdata/metadata.json b/datasetIngestor/testdata/metadata.json new file mode 100644 index 0000000..303c22d --- /dev/null +++ b/datasetIngestor/testdata/metadata.json @@ -0,0 +1,20 @@ +{ + "creationLocation": "/PSI/SLS/CSAXS/SWISSFEL", + "datasetName": "CMakeCache", + "description": "", + "owner": "first last", + "ownerEmail": "test@example.com", + "ownerGroup": "group1", + "principalInvestigator": "test@example.com", + "scientificMetadata": [ + { + "sample": { + "description": "", + "name": "", + "principalInvestigator": "" + } + } + ], + "sourceFolder": "/usr/share/gnome", + "type": "raw" +} diff --git a/datasetIngestor/testdata/metadata_illegal.json b/datasetIngestor/testdata/metadata_illegal.json new file mode 100644 index 0000000..accae75 --- /dev/null +++ b/datasetIngestor/testdata/metadata_illegal.json @@ -0,0 +1,20 @@ +{ + "creationLocation": "/PSI/SLS/CSAXS", + "datasetName": "CMakeCache", + "description": "", + "owner": "first last", + "ownerEmail": "test@example.com", + "ownerGroup": "group1", + "principalInvestigator": "test@example.com", + "scientificMetadata": [ + { + "sample": { + "description.": "It has an illegal characters in the key", + "name]": "same", + "principalInvestigator": "" + } + } + ], + "sourceFolder": "/usr/share/gnome", + "type": "raw" +} From e7f35d6ed7dd520c23ac446048f01f17fecbcfc7 Mon Sep 17 00:00:00 2001 From: Ali Vahdati <3798865+kavir1698@users.noreply.github.com> Date: Wed, 27 Mar 2024 10:49:07 +0100 Subject: [PATCH 7/9] Add test for checkMetadata function (#4) * Add test - not passing * Add comments and testdata dir * Make the tests pass. * Add more tests * Mark a line for future improvement --- datasetIngestor/checkMetadata.go | 1 + datasetIngestor/checkMetadata_test.go | 13 ++++++------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/datasetIngestor/checkMetadata.go b/datasetIngestor/checkMetadata.go index 75d2a02..9b297df 100644 --- a/datasetIngestor/checkMetadata.go +++ b/datasetIngestor/checkMetadata.go @@ -76,6 +76,7 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u panic("Metadata contains keys with illegal characters (., [], $, or <>).") } + // If the user is not the ingestor, check whether any of the accessGroups equal the ownerGroup. Otherwise, check for beamline-specific accounts. if user["displayName"] != "ingestor" { // Check if the metadata contains the "ownerGroup" key. if ownerGroup, ok := metaDataMap["ownerGroup"]; ok { // type assertion with a comma-ok idiom diff --git a/datasetIngestor/checkMetadata_test.go b/datasetIngestor/checkMetadata_test.go index 5e86d8d..8c259ec 100644 --- a/datasetIngestor/checkMetadata_test.go +++ b/datasetIngestor/checkMetadata_test.go @@ -24,15 +24,14 @@ func TestGetHost(t *testing.T) { func TestCheckMetadata(t *testing.T) { // Define mock parameters for the function - var TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" // "https://example.com/api" + var TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" // TODO: Test Improvement. Change this to a mock server. At the moment, tests will fail if we change this to a mock server. var APIServer = TEST_API_SERVER var metadatafile1 = "testdata/metadata.json" var metadatafile2 = "testdata/metadata-short.json" - // var metadatafile3 = "testdata/metadata_illegal.json" // Mock HTTP client client := &http.Client{ - Timeout: 5 * time.Second, // Set a timeout for requests + Timeout: 5 * time.Second, // Set a timeout for requests Transport: &http.Transport{ // Customize the transport settings if needed (e.g., proxy, TLS config) // For a dummy client, default settings are usually sufficient @@ -73,13 +72,13 @@ func TestCheckMetadata(t *testing.T) { t.Error("Expected beamlineAccount to be false") } if _, ok := metaDataMap["ownerEmail"]; !ok { - t.Error("metaDataMap missing required key 'ownerEmail'") + t.Error("metaDataMap missing required key 'ownerEmail'") } if _, ok := metaDataMap["principalInvestigator"]; !ok { - t.Error("metaDataMap missing required key 'principalInvestigator'") + t.Error("metaDataMap missing required key 'principalInvestigator'") } if _, ok := metaDataMap["scientificMetadata"]; !ok { - t.Error("metaDataMap missing required key 'scientificMetadata'") + t.Error("metaDataMap missing required key 'scientificMetadata'") } scientificMetadata, ok := metaDataMap["scientificMetadata"].([]interface{}) if ok { @@ -128,7 +127,7 @@ func TestCheckMetadata_CrashCase(t *testing.T) { }() // Define mock parameters for the function - var TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" // "https://example.com/api" + var TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" var APIServer = TEST_API_SERVER var metadatafile3 = "testdata/metadata_illegal.json" From ab6c80346b6656ac13b503143d893cd06b49336e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 27 Mar 2024 10:55:13 +0100 Subject: [PATCH 8/9] Bump golang.org/x/crypto from 0.3.0 to 0.17.0 (#19) Bumps [golang.org/x/crypto](https://github.com/golang/crypto) from 0.3.0 to 0.17.0. - [Commits](https://github.com/golang/crypto/compare/v0.3.0...v0.17.0) --- updated-dependencies: - dependency-name: golang.org/x/crypto dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 6 +++--- go.sum | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/go.mod b/go.mod index 3eeddb8..ec1d588 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ require ( github.com/fatih/color v1.13.0 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 github.com/mcuadros/go-version v0.0.0-20190830083331-035f6764e8d2 - golang.org/x/crypto v0.3.0 + golang.org/x/crypto v0.17.0 gopkg.in/yaml.v2 v2.4.0 ) @@ -16,6 +16,6 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/mattn/go-colorable v0.1.9 // indirect github.com/mattn/go-isatty v0.0.14 // indirect - golang.org/x/sys v0.2.0 // indirect - golang.org/x/term v0.2.0 // indirect + golang.org/x/sys v0.15.0 // indirect + golang.org/x/term v0.15.0 // indirect ) diff --git a/go.sum b/go.sum index 4887fa9..f1b9ea5 100644 --- a/go.sum +++ b/go.sum @@ -21,15 +21,15 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -golang.org/x/crypto v0.3.0 h1:a06MkbcxBrEFc0w0QIZWXrH/9cCX6KJyWbBOIwAn+7A= -golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= +golang.org/x/crypto v0.17.0 h1:r8bRNjWL3GshPW3gkd+RpvzWrZAwPS49OmTGZ/uhM4k= +golang.org/x/crypto v0.17.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.2.0 h1:ljd4t30dBnAvMZaQCevtY0xLLD0A+bRZXbgLMLU1F/A= -golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/term v0.2.0 h1:z85xZCsEl7bi/KwbNADeBYoOP0++7W1ipu+aGnpwzRM= -golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= +golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc= +golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.15.0 h1:y/Oo/a/q3IXu26lQgl04j/gjuBDOBlx7X6Om1j2CPW4= +golang.org/x/term v0.15.0/go.mod h1:BDl952bC7+uMoWR75FIrCDx79TPU9oHkTZ9yRbYOrX0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= From a95d0cdd3826852ba8428a32c6085a75e45d7d51 Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" <3798865+kavir1698@users.noreply.github.com> Date: Wed, 27 Mar 2024 11:46:38 +0100 Subject: [PATCH 9/9] Add dummy email --- datasetIngestor/testdata/metadata-short.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasetIngestor/testdata/metadata-short.json b/datasetIngestor/testdata/metadata-short.json index bf93309..4912d85 100644 --- a/datasetIngestor/testdata/metadata-short.json +++ b/datasetIngestor/testdata/metadata-short.json @@ -1,5 +1,5 @@ { - "principalInvestigator":"egon.meier@psi.ch", + "principalInvestigator":"piemail@example.ch", "creationLocation":"/PSI/SLS/CSAXS/SWISSFEL", "sourceFolder": "/tmp/gnome", "owner": "first last",