Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test and refactor GetArchivableDatasets #60

Merged
merged 3 commits into from
May 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 78 additions & 29 deletions datasetUtils/getArchivableDatasets.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@ package datasetUtils
import (
"encoding/json"
"github.com/fatih/color"
"io/ioutil"
"io"
"log"
"net/http"
"net/url"
"strings"
"fmt"
)

type DatasetInfo struct {
Expand All @@ -20,56 +21,100 @@ type QueryResult []DatasetInfo
// function that assembles the datasetIds to be fetched in chunks
// see https://blog.golang.org/slices for explanation why datasetList slice should be a return parameter

func addResult(client *http.Client, APIServer string, filter string, accessToken string, datasetList []string) []string {
/*
addResult is a helper function that sends a GET request to the API server to fetch dataset details and appends the IDs of the datasets that are archivable to the datasetList.

Parameters:
- client: An instance of http.Client used to send the request.
- APIServer: The URL of the API server.
- filter: The filter query to be used in the GET request.
- accessToken: The access token used for authentication.
- datasetList: The list of dataset IDs to which the IDs of the archivable datasets will be appended.

The function first constructs the URL for the GET request by appending the filter and access token to the APIServer URL. It then sends the GET request and reads the response.

If the status code of the response is 200, the function reads the body of the response and unmarshals it into a QueryResult object. It then iterates over the datasets in the QueryResult. If a dataset's size is greater than 0, the function logs the dataset's details and appends its ID to the datasetList. If a dataset's size is 0, the function logs the dataset's details in red and does not append its ID to the datasetList.

If the status code of the response is not 200, the function logs the status code.

The function returns the updated datasetList.

Note: The function logs a fatal error and terminates the program if it fails to send the GET request or unmarshal the response body.
*/
func addResult(client *http.Client, APIServer string, filter string, accessToken string, datasetList []string) ([]string, error) {
v := url.Values{}
v.Set("filter", filter)
v.Add("access_token", accessToken)

var myurl = APIServer + "/Datasets?" + v.Encode()
//fmt.Println("Url:", myurl)
myurl := fmt.Sprintf("%s/Datasets?%s", APIServer, v.Encode())

resp, err := client.Get(myurl)
if err != nil {
log.Fatal("Get dataset details failed:", err)
return nil, fmt.Errorf("get dataset details failed: %w", err)
}
defer resp.Body.Close()

if resp.StatusCode == 200 {
body, _ := ioutil.ReadAll(resp.Body)
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}

var respObj QueryResult
err = json.Unmarshal(body, &respObj)
if err != nil {
log.Fatal(err)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("read response body failed: %w", err)
}

if len(respObj) > 0 {
log.Printf("Found the following datasets in state archivable: (size=0 datasets are removed)")
var item DatasetInfo
for _, item = range respObj {
if item.Size > 0 {
log.Printf("Folder: %v, size: %v, PID: %v", item.SourceFolder, item.Size, item.Pid)
datasetList = append(datasetList, item.Pid)
} else {
color.Set(color.FgRed)
log.Printf("Folder: %v, size: %v, PID: %v will be ignored !", item.SourceFolder, item.Size, item.Pid)
color.Unset()
}
var respObj QueryResult
err = json.Unmarshal(body, &respObj)
if err != nil {
return nil, fmt.Errorf("unmarshal response body failed: %w", err)
}

if len(respObj) > 0 {
log.Printf("Found the following datasets in state archivable: (size=0 datasets are removed)")
for _, item := range respObj {
if item.Size > 0 {
log.Printf("Folder: %v, size: %v, PID: %v", item.SourceFolder, item.Size, item.Pid)
datasetList = append(datasetList, item.Pid)
} else {
color.Set(color.FgRed)
log.Printf("Folder: %v, size: %v, PID: %v will be ignored !", item.SourceFolder, item.Size, item.Pid)
color.Unset()
}
}
} else {
log.Printf("Statuscode:%v", resp.StatusCode)
}
return datasetList

return datasetList, nil
}

/*
GetArchivableDatasets retrieves a list of datasets that are eligible for archiving.

Parameters:
- client: An instance of http.Client used to send the request.
- APIServer: The URL of the API server.
- ownerGroup: The owner group of the datasets. If this is not empty, the function will fetch datasets belonging to this owner group. If it is empty, the function will fetch datasets based on the inputdatasetList.
- inputdatasetList: A list of dataset IDs. This is used only if ownerGroup is empty.
- accessToken: The access token used for authentication.

The function first checks if the ownerGroup is not empty. If it is not, it constructs a filter query to fetch datasets belonging to this owner group that are archivable. It then calls the addResult function to send the request and process the response.

If the ownerGroup is empty, the function splits the inputdatasetList into chunks and for each chunk, it constructs a filter query to fetch datasets with IDs in the chunk that are archivable. It then calls the addResult function for each chunk.

The function returns a list of dataset IDs that are archivable.

Note: A dataset is considered archivable if its size is greater than 0.
*/
func GetArchivableDatasets(client *http.Client, APIServer string, ownerGroup string, inputdatasetList []string, accessToken string) (datasetList []string) {
datasetList = make([]string, 0)

filter := ""
if ownerGroup != "" {
filter = `{"where":{"ownerGroup":"` + ownerGroup + `","datasetlifecycle.archivable":true},"fields": {"pid":1,"size":1,"sourceFolder":1}}`
datasetList = addResult(client, APIServer, filter, accessToken, datasetList)
var err error
datasetList, err = addResult(client, APIServer, filter, accessToken, datasetList)
if err != nil {
log.Fatalf("Error: %v", err)
}
} else {
// split large request into chunks
chunkSize := 100
Expand All @@ -80,7 +125,11 @@ func GetArchivableDatasets(client *http.Client, APIServer string, ownerGroup str
}
quotedList := strings.Join(inputdatasetList[i:end], "\",\"")
filter = `{"where":{"pid":{"inq":["` + quotedList + `"]},"datasetlifecycle.archivable":true},"fields": {"pid":1,"size":1,"sourceFolder":1}}`
datasetList = addResult(client, APIServer, filter, accessToken, datasetList)
var err error
datasetList, err = addResult(client, APIServer, filter, accessToken, datasetList)
if err != nil {
log.Fatalf("Error: %v", err)
}
}
}
return datasetList
Expand Down
124 changes: 124 additions & 0 deletions datasetUtils/getArchivableDatasets_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
package datasetUtils

import (
"net/http"
"net/http/httptest"
"testing"
)

// This suite includes two test cases: one where `ownerGroup` is provided and one where `inputdatasetList` is provided.
func TestGetArchivableDatasets(t *testing.T) {
tests := []struct {
name string
serverResponse string
ownerGroup string
inputdatasetList []string
expected []string
}{
{
name: "Test with ownerGroup",
serverResponse: `[{"pid":"1","sourceFolder":"folder1","size":10},{"pid":"2","sourceFolder":"folder2","size":0},{"pid":"3","sourceFolder":"folder3","size":20}]`,
ownerGroup: "testGroup",
inputdatasetList: []string{},
expected: []string{"1", "3"},
},
{
name: "Test without ownerGroup",
serverResponse: `[{"pid":"1","sourceFolder":"folder1","size":10},{"pid":"2","sourceFolder":"folder2","size":0},{"pid":"3","sourceFolder":"folder3","size":20}]`,
ownerGroup: "",
inputdatasetList: []string{"1", "2", "3"},
expected: []string{"1", "3"},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create a mock HTTP server
server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
// Send response to be tested
rw.Write([]byte(tt.serverResponse))
}))
// Close the server when test finishes
defer server.Close()

// Use Client & URL from our local test server
client := server.Client()
APIServer := server.URL
accessToken := "testToken"

// Call our function
datasetList := GetArchivableDatasets(client, APIServer, tt.ownerGroup, tt.inputdatasetList, accessToken)

// Check if the function results match our expectations
if len(datasetList) != len(tt.expected) {
t.Errorf("Expected length %v but got %v", len(tt.expected), len(datasetList))
}

for i, v := range datasetList {
if v != tt.expected[i] {
t.Errorf("Expected %v but got %v", tt.expected[i], v)
}
}
})
}
}

// This test suite includes two test cases: one where the server response includes datasets that are archivable (size > 0), and one where none of the datasets are archivable.
func TestAddResult(t *testing.T) {
tests := []struct {
name string
serverResponse string
filter string
datasetList []string
expected []string
}{
{
name: "Test with archivable datasets",
serverResponse: `[{"pid":"1","sourceFolder":"folder1","size":10},{"pid":"2","sourceFolder":"folder2","size":0},{"pid":"3","sourceFolder":"folder3","size":20}]`,
filter: "size>0",
datasetList: []string{},
expected: []string{"1", "3"},
},
{
name: "Test without archivable datasets",
serverResponse: `[{"pid":"1","sourceFolder":"folder1","size":0},{"pid":"2","sourceFolder":"folder2","size":0},{"pid":"3","sourceFolder":"folder3","size":0}]`,
filter: "size>0",
datasetList: []string{},
expected: []string{},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create a mock HTTP server
server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
// Send response to be tested
rw.Write([]byte(tt.serverResponse))
}))
// Close the server when test finishes
defer server.Close()

// Use Client & URL from our local test server
client := server.Client()
APIServer := server.URL
accessToken := "testToken"

// Call our function
datasetList, err := addResult(client, APIServer, tt.filter, accessToken, tt.datasetList)
if err != nil {
t.Errorf("Error: %v", err)
}

// Check if the function results match our expectations
if len(datasetList) != len(tt.expected) {
t.Errorf("Expected length %v but got %v", len(tt.expected), len(datasetList))
}

for i, v := range datasetList {
if v != tt.expected[i] {
t.Errorf("Expected %v but got %v", tt.expected[i], v)
}
}
})
}
}
Loading