From c3854969edb43a196698982915cac87376d6d764 Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" Date: Wed, 24 Apr 2024 11:11:04 +0200 Subject: [PATCH 1/8] Add tests --- datasetIngestor/sendIngestCommand.go | 36 ++++++++ datasetIngestor/sendIngestCommand_test.go | 105 ++++++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 datasetIngestor/sendIngestCommand_test.go diff --git a/datasetIngestor/sendIngestCommand.go b/datasetIngestor/sendIngestCommand.go index a5b40bd..22e4061 100644 --- a/datasetIngestor/sendIngestCommand.go +++ b/datasetIngestor/sendIngestCommand.go @@ -26,6 +26,18 @@ const TOTAL_MAXFILES = 400000 const BLOCK_MAXBYTES = 200000000000 // 700000 for testing the logic const BLOCK_MAXFILES = 20000 // 20 for testing the logic +/* createOrigBlock generates a `FileBlock` from a subset of a given `filesArray`. +It takes start and end indices to determine the subset, and a datasetId to associate with the FileBlock. +The function calculates the total size of all Datafiles in the subset and includes this in the FileBlock. + +Parameters: +start: The starting index of the subset in the filesArray. +end: The ending index of the subset in the filesArray. +filesArray: The array of Datafiles to create the FileBlock from. +datasetId: The id to associate with the FileBlock. + +Returns: +A FileBlock that includes the total size of the Datafiles in the subset, the subset of Datafiles, and the datasetId. */ func createOrigBlock(start int, end int, filesArray []Datafile, datasetId string) (fileblock FileBlock) { // accumulate sizes var totalSize int64 @@ -38,6 +50,30 @@ func createOrigBlock(start int, end int, filesArray []Datafile, datasetId string return FileBlock{Size: totalSize, DataFileList: filesArray[start:end], DatasetId: datasetId} } +/* +SendIngestCommand sends an ingest command to the API server to create a new dataset and associated data blocks. + +Parameters: +client: The HTTP client used to send the request. +APIServer: The URL of the API server. +metaDataMap: A map containing metadata for the dataset. +fullFileArray: An array of Datafile objects representing the files in the dataset. +user: A map containing user information, including the access token. + +The function first creates a new dataset by sending a POST request to the appropriate endpoint on the API server, +based on the dataset type specified in metaDataMap. The dataset type can be "raw", "derived", or "base". +If the dataset type is not one of these, the function logs a fatal error. + +The function then creates original data blocks for the dataset. It splits the dataset into blocks if the dataset +contains more than a certain number of files or if the total size of the files exceeds a certain limit. +Each block is created by calling the createOrigBlock function and then sending a POST request to the "/OrigDatablocks" +endpoint on the API server. + +If the total number of files in the dataset exceeds the maximum limit, the function logs a fatal error. + +Returns: +The ID of the created dataset. +*/ func SendIngestCommand(client *http.Client, APIServer string, metaDataMap map[string]interface{}, fullFileArray []Datafile, user map[string]string) (datasetId string) { // create dataset diff --git a/datasetIngestor/sendIngestCommand_test.go b/datasetIngestor/sendIngestCommand_test.go new file mode 100644 index 0000000..498d7cd --- /dev/null +++ b/datasetIngestor/sendIngestCommand_test.go @@ -0,0 +1,105 @@ +package datasetIngestor + +import ( + "net/http" + "net/http/httptest" + "testing" + "time" + "strings" +) + +func TestCreateOrigBlock(t *testing.T) { + // Define a slice of Datafile for testing + datafiles := []Datafile{ + {Size: 100}, + {Size: 200}, + {Size: 300}, + {Size: 400}, + } + + // Call createOrigBlock function + block := createOrigBlock(1, 3, datafiles, "test-dataset") + + // Check the Size of the returned FileBlock + if block.Size != 500 { + t.Errorf("Expected block size of 500, but got %d", block.Size) + } + + // Check the length of DataFileList in the returned FileBlock + if len(block.DataFileList) != 2 { + t.Errorf("Expected 2 datafiles in the block, but got %d", len(block.DataFileList)) + } + + // Check the DatasetId of the returned FileBlock + if block.DatasetId != "test-dataset" { + t.Errorf("Expected dataset id of 'test-dataset', but got %s", block.DatasetId) + } +} + +func TestSendIngestCommand(t *testing.T) { + // Mock HTTP client + client := &http.Client{ + Timeout: 5 * time.Second, // Set a timeout for requests + Transport: &http.Transport{ + // Customize the transport settings if needed (e.g., proxy, TLS config) + // For a dummy client, default settings are usually sufficient + }, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + // Customize how redirects are handled if needed + // For a dummy client, default behavior is usually sufficient + return http.ErrUseLastResponse // Use the last response for redirects + }, + } + + // Mock user map + user := map[string]string{ + "displayName": "csaxsswissfel", + "mail": "testuser@example.com", + "accessToken": "test-access-token", + } + +// Mock metaDataMap + metaDataMap := map[string]interface{}{ + "accessGroups": []string{}, + "contactEmail": "testuser@example.com", + "creationLocation": "/PSI/", + "creationTime": "2300-01-01T11:11:11.000Z", + "datasetName": "CMakeCache", + "description": "", + "endTime": "2300-01-01T11:11:11.000Z", + "owner": "first last", + "ownerEmail": "test@example.com", + "ownerGroup": "group1", + "principalInvestigator": "test@example.com", + "scientificMetadata": []map[string]map[string]string{{"sample": {"description": "", "name": "", "principalInvestigator": ""}}}, + "sourceFolder": "/usr/share/gnome", + "sourceFolderHost": "PC162.psi.ch", + "type": "raw", + } + // Mock datafiles + datafiles := []Datafile{ + {Size: 100}, + {Size: 200}, + {Size: 300}, + {Size: 400}, + } + + // Create a mock server + server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { + // Respond with a fixed dataset ID when a new dataset is created + if strings.HasPrefix(req.URL.Path, "/RawDatasets") || strings.HasPrefix(req.URL.Path, "/DerivedDatasets") || strings.HasPrefix(req.URL.Path, "/Datasets") { + rw.Write([]byte(`{"pid": "test-dataset-id"}`)) + } else { + // Respond with a 200 status code when a new data block is created + rw.WriteHeader(http.StatusOK) + } + })) + // Close the server when test finishes + defer server.Close() + + // Call SendIngestCommand function with the mock server's URL and check the returned dataset ID + datasetId := SendIngestCommand(client, server.URL, metaDataMap, datafiles, user) + if datasetId != "test-dataset-id" { + t.Errorf("Expected dataset id 'test-dataset-id', but got '%s'", datasetId) + } +} From 61024b9cc7815db747098bccfd3a7af94a14ce2b Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" Date: Wed, 24 Apr 2024 11:29:50 +0200 Subject: [PATCH 2/8] Refactor `sendIngestCommand` --- datasetIngestor/sendIngestCommand.go | 140 +++++++++++----------- datasetIngestor/sendIngestCommand_test.go | 3 +- 2 files changed, 75 insertions(+), 68 deletions(-) diff --git a/datasetIngestor/sendIngestCommand.go b/datasetIngestor/sendIngestCommand.go index 22e4061..41c46d1 100644 --- a/datasetIngestor/sendIngestCommand.go +++ b/datasetIngestor/sendIngestCommand.go @@ -46,7 +46,7 @@ func createOrigBlock(start int, end int, filesArray []Datafile, datasetId string totalSize += filesArray[i].Size } // fmt.Printf("Start:%v, end:%v, totalsize:%v\n, first entry:%v\n", start, end, totalSize, filesArray[start]) - + return FileBlock{Size: totalSize, DataFileList: filesArray[start:end], DatasetId: datasetId} } @@ -76,106 +76,112 @@ The ID of the created dataset. */ func SendIngestCommand(client *http.Client, APIServer string, metaDataMap map[string]interface{}, fullFileArray []Datafile, user map[string]string) (datasetId string) { - // create dataset + + datasetId = createDataset(client, APIServer, metaDataMap, user) + createOrigDatablocks(client, APIServer, fullFileArray, datasetId, user) + + return datasetId +} +func createDataset(client *http.Client, APIServer string, metaDataMap map[string]interface{}, user map[string]string) string { cmm, _ := json.Marshal(metaDataMap) - // metadataString := string(cmm) - datasetId = "" - + datasetId := "" + if val, ok := metaDataMap["type"]; ok { dstype := val.(string) - endpoint := "" - if dstype == "raw" { - endpoint = "/RawDatasets" - } else if dstype == "derived" { - endpoint = "/DerivedDatasets" - } else if dstype == "base" { - endpoint = "/Datasets" - } else { - log.Fatal("Unknown dataset type encountered:", dstype) - } + endpoint := getEndpoint(dstype) myurl := APIServer + endpoint + "/?access_token=" + user["accessToken"] - req, err := http.NewRequest("POST", myurl, bytes.NewBuffer(cmm)) - req.Header.Set("Content-Type", "application/json") - - // fmt.Printf("request to message broker:%v\n", req) - - resp, err := client.Do(req) - if err != nil { - log.Fatal(err) - } + resp := sendRequest(client, "POST", myurl, cmm) defer resp.Body.Close() - - // log.Println("response Status:", resp.Status) - // fmt.Println("response Headers:", resp.Header) + if resp.StatusCode == 200 { - // important: use capital first character in field names! - type PidType struct { - Pid string `json:"pid"` - } - decoder := json.NewDecoder(resp.Body) - var d PidType - err := decoder.Decode(&d) - if err != nil { - log.Fatal("Could not decode pid from dataset entry:", err) - } - datasetId = d.Pid + datasetId = decodePid(resp) log.Printf("Created dataset with id %v", datasetId) } else { log.Fatalf("SendIngestCommand: Failed to create new dataset: status code %v\n", resp.StatusCode) - } + } } else { log.Fatalf("No dataset type defined for dataset %v\n", metaDataMap) } - - // create OrigDatablocks - // split datasets with many files into blocks, which limit number of files and size per block - + + return datasetId +} + +func getEndpoint(dstype string) string { + switch dstype { + case "raw": + return "/RawDatasets" + case "derived": + return "/DerivedDatasets" + case "base": + return "/Datasets" + default: + log.Fatal("Unknown dataset type encountered:", dstype) + return "" + } +} + +func sendRequest(client *http.Client, method, url string, body []byte) *http.Response { + req, err := http.NewRequest(method, url, bytes.NewBuffer(body)) + if err != nil { + log.Fatal(err) + } + req.Header.Set("Content-Type", "application/json") + + resp, err := client.Do(req) + if err != nil { + log.Fatal(err) + } + + return resp +} + +func decodePid(resp *http.Response) string { + type PidType struct { + Pid string `json:"pid"` + } + decoder := json.NewDecoder(resp.Body) + var d PidType + err := decoder.Decode(&d) + if err != nil { + log.Fatal("Could not decode pid from dataset entry:", err) + } + + return d.Pid +} + +func createOrigDatablocks(client *http.Client, APIServer string, fullFileArray []Datafile, datasetId string, user map[string]string) { totalFiles := len(fullFileArray) - + if totalFiles > TOTAL_MAXFILES { log.Fatalf( "This datasets exceeds (%v) the maximum number of files per dataset , which can currently be handled by the archiving system (%v)\n", totalFiles, TOTAL_MAXFILES) } - + log.Printf("The dataset contains %v files. \n", totalFiles) - + end := 0 var blockBytes int64 for start := 0; end < totalFiles; { - // loop over treated files until one of the limits is reached blockBytes = 0 - + for end = start; end-start < BLOCK_MAXFILES && blockBytes < BLOCK_MAXBYTES && end < totalFiles; { blockBytes += fullFileArray[end].Size end++ - // log.Println("Inside inner loop:", start, end, blockBytes) } origBlock := createOrigBlock(start, end, fullFileArray, datasetId) - - payloadString, err := json.Marshal(origBlock) - // log.Printf("Payload for block:%s\n", payloadString) + + payloadString, _ := json.Marshal(origBlock) myurl := APIServer + "/OrigDatablocks" + "?access_token=" + user["accessToken"] - req, err := http.NewRequest("POST", myurl, bytes.NewBuffer(payloadString)) - req.Header.Set("Content-Type", "application/json") - //fmt.Printf("request to message broker:%v\n", req) - resp, err := client.Do(req) - if err != nil { - log.Fatal(err) - } + resp := sendRequest(client, "POST", myurl, payloadString) + defer resp.Body.Close() + if resp.StatusCode != 200 { log.Fatalf("Unexpected response code %v when adding origDatablock for dataset id:%v", resp.Status, datasetId) } - defer resp.Body.Close() - - // log.Println("response Status:", resp.Status) - //fmt.Println("response Headers:", resp.Header) - // body, _ := ioutil.ReadAll(resp.Body) - // log.Println("Message response Body:", string(body)[0]) + log.Printf("Created file block from file %v to %v with total size of %v bytes and %v files \n", start, end-1, blockBytes, end-start) start = end } - - return datasetId } diff --git a/datasetIngestor/sendIngestCommand_test.go b/datasetIngestor/sendIngestCommand_test.go index 498d7cd..299c713 100644 --- a/datasetIngestor/sendIngestCommand_test.go +++ b/datasetIngestor/sendIngestCommand_test.go @@ -58,7 +58,7 @@ func TestSendIngestCommand(t *testing.T) { "accessToken": "test-access-token", } -// Mock metaDataMap + // Mock metaDataMap metaDataMap := map[string]interface{}{ "accessGroups": []string{}, "contactEmail": "testuser@example.com", @@ -76,6 +76,7 @@ func TestSendIngestCommand(t *testing.T) { "sourceFolderHost": "PC162.psi.ch", "type": "raw", } + // Mock datafiles datafiles := []Datafile{ {Size: 100}, From b6a6290eff1c406f2164567ad2b5eacd5ace0ee1 Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" Date: Wed, 24 Apr 2024 13:10:53 +0200 Subject: [PATCH 3/8] Refactor and add more tests --- datasetIngestor/sendIngestCommand.go | 17 +-- datasetIngestor/sendIngestCommand_test.go | 145 ++++++++++++++++------ 2 files changed, 118 insertions(+), 44 deletions(-) diff --git a/datasetIngestor/sendIngestCommand.go b/datasetIngestor/sendIngestCommand.go index 41c46d1..009d670 100644 --- a/datasetIngestor/sendIngestCommand.go +++ b/datasetIngestor/sendIngestCommand.go @@ -5,6 +5,7 @@ import ( "encoding/json" "log" "net/http" + "fmt" ) type FileBlock struct { @@ -89,7 +90,10 @@ func createDataset(client *http.Client, APIServer string, metaDataMap map[string if val, ok := metaDataMap["type"]; ok { dstype := val.(string) - endpoint := getEndpoint(dstype) + endpoint, err := getEndpoint(dstype) + if err != nil { + log.Fatal(err) + } myurl := APIServer + endpoint + "/?access_token=" + user["accessToken"] resp := sendRequest(client, "POST", myurl, cmm) defer resp.Body.Close() @@ -107,17 +111,16 @@ func createDataset(client *http.Client, APIServer string, metaDataMap map[string return datasetId } -func getEndpoint(dstype string) string { +func getEndpoint(dstype string) (string, error) { switch dstype { case "raw": - return "/RawDatasets" + return "/RawDatasets", nil case "derived": - return "/DerivedDatasets" + return "/DerivedDatasets", nil case "base": - return "/Datasets" + return "/Datasets", nil default: - log.Fatal("Unknown dataset type encountered:", dstype) - return "" + return "", fmt.Errorf("Unknown dataset type encountered: %s", dstype) } } diff --git a/datasetIngestor/sendIngestCommand_test.go b/datasetIngestor/sendIngestCommand_test.go index 299c713..7c65d08 100644 --- a/datasetIngestor/sendIngestCommand_test.go +++ b/datasetIngestor/sendIngestCommand_test.go @@ -1,11 +1,15 @@ package datasetIngestor import ( + "bytes" + "log" "net/http" "net/http/httptest" + "os" + "strings" "testing" "time" - "strings" + "io" ) func TestCreateOrigBlock(t *testing.T) { @@ -16,20 +20,20 @@ func TestCreateOrigBlock(t *testing.T) { {Size: 300}, {Size: 400}, } - + // Call createOrigBlock function block := createOrigBlock(1, 3, datafiles, "test-dataset") - + // Check the Size of the returned FileBlock if block.Size != 500 { t.Errorf("Expected block size of 500, but got %d", block.Size) } - + // Check the length of DataFileList in the returned FileBlock if len(block.DataFileList) != 2 { t.Errorf("Expected 2 datafiles in the block, but got %d", len(block.DataFileList)) } - + // Check the DatasetId of the returned FileBlock if block.DatasetId != "test-dataset" { t.Errorf("Expected dataset id of 'test-dataset', but got %s", block.DatasetId) @@ -39,7 +43,7 @@ func TestCreateOrigBlock(t *testing.T) { func TestSendIngestCommand(t *testing.T) { // Mock HTTP client client := &http.Client{ - Timeout: 5 * time.Second, // Set a timeout for requests + Timeout: 5 * time.Second, // Set a timeout for requests Transport: &http.Transport{ // Customize the transport settings if needed (e.g., proxy, TLS config) // For a dummy client, default settings are usually sufficient @@ -50,33 +54,33 @@ func TestSendIngestCommand(t *testing.T) { return http.ErrUseLastResponse // Use the last response for redirects }, } - + // Mock user map user := map[string]string{ - "displayName": "csaxsswissfel", - "mail": "testuser@example.com", - "accessToken": "test-access-token", - } - + "displayName": "csaxsswissfel", + "mail": "testuser@example.com", + "accessToken": "test-access-token", + } + // Mock metaDataMap metaDataMap := map[string]interface{}{ - "accessGroups": []string{}, - "contactEmail": "testuser@example.com", - "creationLocation": "/PSI/", - "creationTime": "2300-01-01T11:11:11.000Z", - "datasetName": "CMakeCache", - "description": "", - "endTime": "2300-01-01T11:11:11.000Z", - "owner": "first last", - "ownerEmail": "test@example.com", - "ownerGroup": "group1", - "principalInvestigator": "test@example.com", - "scientificMetadata": []map[string]map[string]string{{"sample": {"description": "", "name": "", "principalInvestigator": ""}}}, - "sourceFolder": "/usr/share/gnome", - "sourceFolderHost": "PC162.psi.ch", - "type": "raw", + "accessGroups": []string{}, + "contactEmail": "testuser@example.com", + "creationLocation": "/PSI/", + "creationTime": "2300-01-01T11:11:11.000Z", + "datasetName": "CMakeCache", + "description": "", + "endTime": "2300-01-01T11:11:11.000Z", + "owner": "first last", + "ownerEmail": "test@example.com", + "ownerGroup": "group1", + "principalInvestigator": "test@example.com", + "scientificMetadata": []map[string]map[string]string{{"sample": {"description": "", "name": "", "principalInvestigator": ""}}}, + "sourceFolder": "/usr/share/gnome", + "sourceFolderHost": "PC162.psi.ch", + "type": "raw", } - + // Mock datafiles datafiles := []Datafile{ {Size: 100}, @@ -84,23 +88,90 @@ func TestSendIngestCommand(t *testing.T) { {Size: 300}, {Size: 400}, } - + // Create a mock server server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { - // Respond with a fixed dataset ID when a new dataset is created - if strings.HasPrefix(req.URL.Path, "/RawDatasets") || strings.HasPrefix(req.URL.Path, "/DerivedDatasets") || strings.HasPrefix(req.URL.Path, "/Datasets") { - rw.Write([]byte(`{"pid": "test-dataset-id"}`)) - } else { - // Respond with a 200 status code when a new data block is created - rw.WriteHeader(http.StatusOK) - } + // Respond with a fixed dataset ID when a new dataset is created + if strings.HasPrefix(req.URL.Path, "/RawDatasets") || strings.HasPrefix(req.URL.Path, "/DerivedDatasets") || strings.HasPrefix(req.URL.Path, "/Datasets") { + rw.Write([]byte(`{"pid": "test-dataset-id"}`)) + } else { + // Respond with a 200 status code when a new data block is created + rw.WriteHeader(http.StatusOK) + } })) // Close the server when test finishes defer server.Close() - + // Call SendIngestCommand function with the mock server's URL and check the returned dataset ID datasetId := SendIngestCommand(client, server.URL, metaDataMap, datafiles, user) if datasetId != "test-dataset-id" { t.Errorf("Expected dataset id 'test-dataset-id', but got '%s'", datasetId) } } + +func TestGetEndpoint(t *testing.T) { + // Redirect log output to a buffer + var buf bytes.Buffer + log.SetOutput(&buf) + defer func() { + log.SetOutput(os.Stderr) + }() + + testCases := []struct { + dsType string + want string + }{ + {"raw", "/RawDatasets"}, + {"derived", "/DerivedDatasets"}, + {"base", "/Datasets"}, + {"unknown", ""}, + } + + for _, tc := range testCases { + got, err := getEndpoint(tc.dsType) + if err != nil && tc.dsType != "unknown" { + t.Errorf("getEndpoint(%q) returned unexpected error: %v", tc.dsType, err) + } + if got != tc.want { + t.Errorf("getEndpoint(%q) = %q; want %q", tc.dsType, got, tc.want) + } + if tc.dsType == "unknown" && err == nil { + t.Errorf("Expected error for unknown dataset type not found") + } + buf.Reset() + } +} + +func TestSendRequest(t *testing.T) { + // Create a test server + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer ts.Close() + + // Create a test client + client := &http.Client{} + + // Call the sendRequest function + resp := sendRequest(client, "GET", ts.URL, nil) + + // Check the response + if resp.StatusCode != http.StatusOK { + t.Errorf("sendRequest() returned status %d; want %d", resp.StatusCode, http.StatusOK) + } +} + +func TestDecodePid(t *testing.T) { + // Create a test response + resp := &http.Response{ + Body: io.NopCloser(strings.NewReader(`{"pid": "12345"}`)), + } + + // Call the decodePid function + pid := decodePid(resp) + + // Check the returned pid + if pid != "12345" { + t.Errorf("decodePid() returned pid %q; want %q", pid, "12345") + } +} From 722c0d6af2a42a9e7c4b438e693ace02ac05e64f Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" Date: Wed, 24 Apr 2024 13:10:53 +0200 Subject: [PATCH 4/8] Add more tests --- datasetIngestor/sendIngestCommand.go | 17 +-- datasetIngestor/sendIngestCommand_test.go | 145 ++++++++++++++++------ 2 files changed, 118 insertions(+), 44 deletions(-) diff --git a/datasetIngestor/sendIngestCommand.go b/datasetIngestor/sendIngestCommand.go index 41c46d1..009d670 100644 --- a/datasetIngestor/sendIngestCommand.go +++ b/datasetIngestor/sendIngestCommand.go @@ -5,6 +5,7 @@ import ( "encoding/json" "log" "net/http" + "fmt" ) type FileBlock struct { @@ -89,7 +90,10 @@ func createDataset(client *http.Client, APIServer string, metaDataMap map[string if val, ok := metaDataMap["type"]; ok { dstype := val.(string) - endpoint := getEndpoint(dstype) + endpoint, err := getEndpoint(dstype) + if err != nil { + log.Fatal(err) + } myurl := APIServer + endpoint + "/?access_token=" + user["accessToken"] resp := sendRequest(client, "POST", myurl, cmm) defer resp.Body.Close() @@ -107,17 +111,16 @@ func createDataset(client *http.Client, APIServer string, metaDataMap map[string return datasetId } -func getEndpoint(dstype string) string { +func getEndpoint(dstype string) (string, error) { switch dstype { case "raw": - return "/RawDatasets" + return "/RawDatasets", nil case "derived": - return "/DerivedDatasets" + return "/DerivedDatasets", nil case "base": - return "/Datasets" + return "/Datasets", nil default: - log.Fatal("Unknown dataset type encountered:", dstype) - return "" + return "", fmt.Errorf("Unknown dataset type encountered: %s", dstype) } } diff --git a/datasetIngestor/sendIngestCommand_test.go b/datasetIngestor/sendIngestCommand_test.go index 299c713..7c65d08 100644 --- a/datasetIngestor/sendIngestCommand_test.go +++ b/datasetIngestor/sendIngestCommand_test.go @@ -1,11 +1,15 @@ package datasetIngestor import ( + "bytes" + "log" "net/http" "net/http/httptest" + "os" + "strings" "testing" "time" - "strings" + "io" ) func TestCreateOrigBlock(t *testing.T) { @@ -16,20 +20,20 @@ func TestCreateOrigBlock(t *testing.T) { {Size: 300}, {Size: 400}, } - + // Call createOrigBlock function block := createOrigBlock(1, 3, datafiles, "test-dataset") - + // Check the Size of the returned FileBlock if block.Size != 500 { t.Errorf("Expected block size of 500, but got %d", block.Size) } - + // Check the length of DataFileList in the returned FileBlock if len(block.DataFileList) != 2 { t.Errorf("Expected 2 datafiles in the block, but got %d", len(block.DataFileList)) } - + // Check the DatasetId of the returned FileBlock if block.DatasetId != "test-dataset" { t.Errorf("Expected dataset id of 'test-dataset', but got %s", block.DatasetId) @@ -39,7 +43,7 @@ func TestCreateOrigBlock(t *testing.T) { func TestSendIngestCommand(t *testing.T) { // Mock HTTP client client := &http.Client{ - Timeout: 5 * time.Second, // Set a timeout for requests + Timeout: 5 * time.Second, // Set a timeout for requests Transport: &http.Transport{ // Customize the transport settings if needed (e.g., proxy, TLS config) // For a dummy client, default settings are usually sufficient @@ -50,33 +54,33 @@ func TestSendIngestCommand(t *testing.T) { return http.ErrUseLastResponse // Use the last response for redirects }, } - + // Mock user map user := map[string]string{ - "displayName": "csaxsswissfel", - "mail": "testuser@example.com", - "accessToken": "test-access-token", - } - + "displayName": "csaxsswissfel", + "mail": "testuser@example.com", + "accessToken": "test-access-token", + } + // Mock metaDataMap metaDataMap := map[string]interface{}{ - "accessGroups": []string{}, - "contactEmail": "testuser@example.com", - "creationLocation": "/PSI/", - "creationTime": "2300-01-01T11:11:11.000Z", - "datasetName": "CMakeCache", - "description": "", - "endTime": "2300-01-01T11:11:11.000Z", - "owner": "first last", - "ownerEmail": "test@example.com", - "ownerGroup": "group1", - "principalInvestigator": "test@example.com", - "scientificMetadata": []map[string]map[string]string{{"sample": {"description": "", "name": "", "principalInvestigator": ""}}}, - "sourceFolder": "/usr/share/gnome", - "sourceFolderHost": "PC162.psi.ch", - "type": "raw", + "accessGroups": []string{}, + "contactEmail": "testuser@example.com", + "creationLocation": "/PSI/", + "creationTime": "2300-01-01T11:11:11.000Z", + "datasetName": "CMakeCache", + "description": "", + "endTime": "2300-01-01T11:11:11.000Z", + "owner": "first last", + "ownerEmail": "test@example.com", + "ownerGroup": "group1", + "principalInvestigator": "test@example.com", + "scientificMetadata": []map[string]map[string]string{{"sample": {"description": "", "name": "", "principalInvestigator": ""}}}, + "sourceFolder": "/usr/share/gnome", + "sourceFolderHost": "PC162.psi.ch", + "type": "raw", } - + // Mock datafiles datafiles := []Datafile{ {Size: 100}, @@ -84,23 +88,90 @@ func TestSendIngestCommand(t *testing.T) { {Size: 300}, {Size: 400}, } - + // Create a mock server server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { - // Respond with a fixed dataset ID when a new dataset is created - if strings.HasPrefix(req.URL.Path, "/RawDatasets") || strings.HasPrefix(req.URL.Path, "/DerivedDatasets") || strings.HasPrefix(req.URL.Path, "/Datasets") { - rw.Write([]byte(`{"pid": "test-dataset-id"}`)) - } else { - // Respond with a 200 status code when a new data block is created - rw.WriteHeader(http.StatusOK) - } + // Respond with a fixed dataset ID when a new dataset is created + if strings.HasPrefix(req.URL.Path, "/RawDatasets") || strings.HasPrefix(req.URL.Path, "/DerivedDatasets") || strings.HasPrefix(req.URL.Path, "/Datasets") { + rw.Write([]byte(`{"pid": "test-dataset-id"}`)) + } else { + // Respond with a 200 status code when a new data block is created + rw.WriteHeader(http.StatusOK) + } })) // Close the server when test finishes defer server.Close() - + // Call SendIngestCommand function with the mock server's URL and check the returned dataset ID datasetId := SendIngestCommand(client, server.URL, metaDataMap, datafiles, user) if datasetId != "test-dataset-id" { t.Errorf("Expected dataset id 'test-dataset-id', but got '%s'", datasetId) } } + +func TestGetEndpoint(t *testing.T) { + // Redirect log output to a buffer + var buf bytes.Buffer + log.SetOutput(&buf) + defer func() { + log.SetOutput(os.Stderr) + }() + + testCases := []struct { + dsType string + want string + }{ + {"raw", "/RawDatasets"}, + {"derived", "/DerivedDatasets"}, + {"base", "/Datasets"}, + {"unknown", ""}, + } + + for _, tc := range testCases { + got, err := getEndpoint(tc.dsType) + if err != nil && tc.dsType != "unknown" { + t.Errorf("getEndpoint(%q) returned unexpected error: %v", tc.dsType, err) + } + if got != tc.want { + t.Errorf("getEndpoint(%q) = %q; want %q", tc.dsType, got, tc.want) + } + if tc.dsType == "unknown" && err == nil { + t.Errorf("Expected error for unknown dataset type not found") + } + buf.Reset() + } +} + +func TestSendRequest(t *testing.T) { + // Create a test server + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer ts.Close() + + // Create a test client + client := &http.Client{} + + // Call the sendRequest function + resp := sendRequest(client, "GET", ts.URL, nil) + + // Check the response + if resp.StatusCode != http.StatusOK { + t.Errorf("sendRequest() returned status %d; want %d", resp.StatusCode, http.StatusOK) + } +} + +func TestDecodePid(t *testing.T) { + // Create a test response + resp := &http.Response{ + Body: io.NopCloser(strings.NewReader(`{"pid": "12345"}`)), + } + + // Call the decodePid function + pid := decodePid(resp) + + // Check the returned pid + if pid != "12345" { + t.Errorf("decodePid() returned pid %q; want %q", pid, "12345") + } +} From 689e6496a8c21d06f1258a977396a173db18b20e Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" <3798865+kavir1698@users.noreply.github.com> Date: Thu, 16 May 2024 21:12:48 +0200 Subject: [PATCH 5/8] Add `TestCreateOrigDatablocks` --- datasetIngestor/sendIngestCommand.go | 18 +++++++- datasetIngestor/sendIngestCommand_test.go | 54 +++++++++++++++++++++++ 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/datasetIngestor/sendIngestCommand.go b/datasetIngestor/sendIngestCommand.go index 009d670..bb33d4f 100644 --- a/datasetIngestor/sendIngestCommand.go +++ b/datasetIngestor/sendIngestCommand.go @@ -152,7 +152,23 @@ func decodePid(resp *http.Response) string { return d.Pid } - + +/* createOrigDatablocks sends a series of POST requests to the server to create original data blocks. + +It divides the fullFileArray into blocks based on the BLOCK_MAXFILES and BLOCK_MAXBYTES constants, and sends a request for each block. + +Parameters: + +client: The HTTP client used to send the requests. +APIServer: The base URL of the API server. +fullFileArray: An array of Datafile objects representing the files in the dataset. +datasetId: The ID of the dataset. +user: A map containing user information. The "accessToken" key should contain the user's access token. + +If the total number of files exceeds TOTAL_MAXFILES, the function logs a fatal error. +If a request receives a response with a status code other than 200, the function logs a fatal error. + +The function logs a message for each created data block, including the start and end file, the total size, and the number of files in the block.*/ func createOrigDatablocks(client *http.Client, APIServer string, fullFileArray []Datafile, datasetId string, user map[string]string) { totalFiles := len(fullFileArray) diff --git a/datasetIngestor/sendIngestCommand_test.go b/datasetIngestor/sendIngestCommand_test.go index 7c65d08..2f25b84 100644 --- a/datasetIngestor/sendIngestCommand_test.go +++ b/datasetIngestor/sendIngestCommand_test.go @@ -175,3 +175,57 @@ func TestDecodePid(t *testing.T) { t.Errorf("decodePid() returned pid %q; want %q", pid, "12345") } } + +func TestCreateOrigDatablocks(t *testing.T) { + // keep track of the number of requests + var requestCount int + + // Create a mock HTTP server + server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { + // Increment the request count for each request + requestCount++ + + // Check if the request method is POST + if req.Method != http.MethodPost { + t.Errorf("Expected POST request, got %s", req.Method) + } + + // Check if the request URL is correct + expectedURL := "/OrigDatablocks?access_token=testToken" + if req.URL.String() != expectedURL { + t.Errorf("Expected request to %s, got %s", expectedURL, req.URL.String()) + } + + rw.WriteHeader(http.StatusOK) + })) + // Close the server when test finishes + defer server.Close() + + // Create a mock HTTP client + client := server.Client() + + // Define test data + datafiles := []Datafile{ + { + Size: 100, + }, + { + Size: 200, + }, + { + Size: 900, + }, + } + user := map[string]string{ + "accessToken": "testToken", + } + + // Call the function with test data + createOrigDatablocks(client, server.URL, datafiles, "testDatasetId", user) + + // Check if the correct number of requests were made + expectedRequestCount := (len(datafiles) + BLOCK_MAXFILES - 1) / BLOCK_MAXFILES + if requestCount != expectedRequestCount { + t.Errorf("Expected %d requests, got %d", expectedRequestCount, requestCount) + } +} From c002893261d0a5bf41f79cc0975dac95f39d66ce Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" <3798865+kavir1698@users.noreply.github.com> Date: Tue, 21 May 2024 13:37:31 +0200 Subject: [PATCH 6/8] Improve `TestCreateOrigDatablocks` --- datasetIngestor/sendIngestCommand_test.go | 113 ++++++++++++++-------- 1 file changed, 70 insertions(+), 43 deletions(-) diff --git a/datasetIngestor/sendIngestCommand_test.go b/datasetIngestor/sendIngestCommand_test.go index 2f25b84..e1d6f5d 100644 --- a/datasetIngestor/sendIngestCommand_test.go +++ b/datasetIngestor/sendIngestCommand_test.go @@ -177,55 +177,82 @@ func TestDecodePid(t *testing.T) { } func TestCreateOrigDatablocks(t *testing.T) { - // keep track of the number of requests - var requestCount int - - // Create a mock HTTP server - server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { - // Increment the request count for each request - requestCount++ - - // Check if the request method is POST - if req.Method != http.MethodPost { - t.Errorf("Expected POST request, got %s", req.Method) - } - - // Check if the request URL is correct - expectedURL := "/OrigDatablocks?access_token=testToken" - if req.URL.String() != expectedURL { - t.Errorf("Expected request to %s, got %s", expectedURL, req.URL.String()) - } - - rw.WriteHeader(http.StatusOK) - })) - // Close the server when test finishes - defer server.Close() - - // Create a mock HTTP client - client := server.Client() - - // Define test data - datafiles := []Datafile{ + // Define test cases + testCases := []struct { + name string + blockMaxFiles int + datafiles []Datafile + expectedRequests int + }{ { - Size: 100, + name: "Case 1: BLOCK_MAXFILES > len(datafiles)", + blockMaxFiles: 20000, + datafiles: make([]Datafile, 10000), + expectedRequests: 1, }, { - Size: 200, + name: "Case 2: BLOCK_MAXFILES < len(datafiles)", + blockMaxFiles: 5000, + datafiles: makeDatafiles(10000, BLOCK_MAXBYTES/5000 + 1), // Create 10000 data files with size BLOCK_MAXBYTES/5000 + 1 + expectedRequests: 2, }, { - Size: 900, + name: "Case 3: BLOCK_MAXFILES = len(datafiles)", + blockMaxFiles: 10000, + datafiles: make([]Datafile, 10000), + expectedRequests: 1, }, } - user := map[string]string{ - "accessToken": "testToken", - } - - // Call the function with test data - createOrigDatablocks(client, server.URL, datafiles, "testDatasetId", user) - - // Check if the correct number of requests were made - expectedRequestCount := (len(datafiles) + BLOCK_MAXFILES - 1) / BLOCK_MAXFILES - if requestCount != expectedRequestCount { - t.Errorf("Expected %d requests, got %d", expectedRequestCount, requestCount) + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Keep track of the number of requests + var numRequests int + + // Create a mock HTTP server + server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { + // Increment the request count for each request + numRequests++ + + // Check if the request method is POST + if req.Method != http.MethodPost { + t.Errorf("Expected POST request, got %s", req.Method) + } + + // Check if the request URL is correct + expectedURL := "/OrigDatablocks?access_token=testToken" + if req.URL.String() != expectedURL { + t.Errorf("Expected request to %s, got %s", expectedURL, req.URL.String()) + } + + rw.WriteHeader(http.StatusOK) + })) + // Close the server when test finishes + defer server.Close() + + // Create a mock HTTP client + client := server.Client() + + // Define user data + user := map[string]string{ + "accessToken": "testToken", + } + + // Call the function with test data + createOrigDatablocks(client, server.URL, tc.datafiles, "testDatasetId", user) + + // Check if the correct number of requests were made + if numRequests != tc.expectedRequests { + t.Errorf("Expected %d requests, got %d", tc.expectedRequests, numRequests) + } + }) } } + +func makeDatafiles(numFiles, size int) []Datafile { + datafiles := make([]Datafile, numFiles) + for i := range datafiles { + datafiles[i] = Datafile{Size: int64(size)} + } + return datafiles +} \ No newline at end of file From 9ec8fb8818ec458bb7718062adecb15fe05f3573 Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" <3798865+kavir1698@users.noreply.github.com> Date: Tue, 21 May 2024 14:57:23 +0200 Subject: [PATCH 7/8] Remove `blockMaxFiles` from test cases --- datasetIngestor/sendIngestCommand_test.go | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/datasetIngestor/sendIngestCommand_test.go b/datasetIngestor/sendIngestCommand_test.go index e1d6f5d..50f1b39 100644 --- a/datasetIngestor/sendIngestCommand_test.go +++ b/datasetIngestor/sendIngestCommand_test.go @@ -180,26 +180,22 @@ func TestCreateOrigDatablocks(t *testing.T) { // Define test cases testCases := []struct { name string - blockMaxFiles int datafiles []Datafile expectedRequests int }{ { name: "Case 1: BLOCK_MAXFILES > len(datafiles)", - blockMaxFiles: 20000, - datafiles: make([]Datafile, 10000), + datafiles: makeDatafiles(10000, BLOCK_MAXBYTES/10000), expectedRequests: 1, }, { name: "Case 2: BLOCK_MAXFILES < len(datafiles)", - blockMaxFiles: 5000, - datafiles: makeDatafiles(10000, BLOCK_MAXBYTES/5000 + 1), // Create 10000 data files with size BLOCK_MAXBYTES/5000 + 1 - expectedRequests: 2, + datafiles: makeDatafiles(40000, BLOCK_MAXBYTES/10000), + expectedRequests: 4, }, { name: "Case 3: BLOCK_MAXFILES = len(datafiles)", - blockMaxFiles: 10000, - datafiles: make([]Datafile, 10000), + datafiles: makeDatafiles(20000, BLOCK_MAXBYTES/20000), expectedRequests: 1, }, } @@ -255,4 +251,4 @@ func makeDatafiles(numFiles, size int) []Datafile { datafiles[i] = Datafile{Size: int64(size)} } return datafiles -} \ No newline at end of file +} From 03a8c171a2c6e2e38856c18ad0a083d57b55962b Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" <3798865+kavir1698@users.noreply.github.com> Date: Tue, 21 May 2024 15:42:21 +0200 Subject: [PATCH 8/8] Simplify metaDataMap and user --- datasetIngestor/sendIngestCommand_test.go | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/datasetIngestor/sendIngestCommand_test.go b/datasetIngestor/sendIngestCommand_test.go index 50f1b39..74a59c8 100644 --- a/datasetIngestor/sendIngestCommand_test.go +++ b/datasetIngestor/sendIngestCommand_test.go @@ -57,28 +57,12 @@ func TestSendIngestCommand(t *testing.T) { // Mock user map user := map[string]string{ - "displayName": "csaxsswissfel", - "mail": "testuser@example.com", - "accessToken": "test-access-token", + "displayName": "test user", } // Mock metaDataMap metaDataMap := map[string]interface{}{ - "accessGroups": []string{}, - "contactEmail": "testuser@example.com", - "creationLocation": "/PSI/", - "creationTime": "2300-01-01T11:11:11.000Z", - "datasetName": "CMakeCache", - "description": "", - "endTime": "2300-01-01T11:11:11.000Z", - "owner": "first last", - "ownerEmail": "test@example.com", - "ownerGroup": "group1", - "principalInvestigator": "test@example.com", - "scientificMetadata": []map[string]map[string]string{{"sample": {"description": "", "name": "", "principalInvestigator": ""}}}, - "sourceFolder": "/usr/share/gnome", - "sourceFolderHost": "PC162.psi.ch", - "type": "raw", + "type": "raw", } // Mock datafiles