diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 204c2de..efd3443 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -19,7 +19,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v4 with: - go-version: '1.20' + go-version: '1.21' - name: Install dependencies run: go get . diff --git a/convert/config.go b/convert/config.go index 7d3ca0c..763762f 100644 --- a/convert/config.go +++ b/convert/config.go @@ -1,7 +1,6 @@ package bedgovcf import ( - "fmt" "log" "os" "strings" @@ -28,62 +27,55 @@ func ReadConfig(configString string) Config { // Validate the config func (c *Config) validate() { - if c.Chrom.Field == "" { - log.Printf("No field defined for CHROM, defaulting to the column 0") - c.Chrom.Field = "0" + if c.Chrom.Value == "" { + log.Printf("No value defined for CHROM, defaulting to the column 0") + c.Chrom.Value = "$0" } - if c.Pos.Field == "" { - log.Printf("No field defined for POS, defaulting to the column 1") - c.Pos.Field = "1" + if c.Pos.Value == "" { + log.Printf("No value defined for POS, defaulting to the column 1") + c.Pos.Value = "$1" } - if c.Id.Field == "" && c.Id.Prefix == "" { - log.Printf("No field or prefix specified for the ID, defaulting to prefix 'id_") + if c.Id.Value == "" && c.Id.Prefix == "" { + log.Printf("No value or prefix specified for the ID, defaulting to prefix 'id_") c.Id.Prefix = "id_" } - if c.Ref.Field == "" && c.Ref.Value == "" { - log.Printf("No field or value specified for the REF, defaulting to value 'N") + if c.Ref.Value == "" { + log.Printf("No value specified for the REF, defaulting to value 'N") c.Ref.Value = "N" } - if c.Alt.Field == "" && c.Alt.Value == "" { - log.Printf("No field or value specified for the ALT, defaulting to value '") + if c.Alt.Value == "" { + log.Printf("No value specified for the ALT, defaulting to value '") c.Alt.Value = "" } - if c.Qual.Field == "" && c.Qual.Value == "" { - log.Printf("No field or value specified for the QUAL, defaulting to value '.'") + if c.Qual.Value == "" { + log.Printf("No value specified for the QUAL, defaulting to value '.'") c.Qual.Value = "." } - if c.Filter.Field == "" && c.Filter.Value == "" { - log.Printf("No field or value specified for the FILTER, defaulting to value 'PASS'") + if c.Filter.Value == "" { + log.Printf("No value specified for the FILTER, defaulting to value 'PASS'") c.Filter.Value = "PASS" } - errStrings := []string{} - if len(c.Info) != 0 { for k, v := range c.Info { - if v.Field == "" && v.Value == "" { - s := fmt.Sprintf("No field or value specified for the INFO/%v", strings.ToUpper(k)) - errStrings = append(errStrings, s) + if v.Value == "" { + log.Printf("No value specified for the INFO/%v", strings.ToUpper(k)) } } } if len(c.Format) != 0 { for k, v := range c.Format { - if v.Field == "" && v.Value == "" { - s := fmt.Sprintf("No field or value specified for the FORMAT/%v", strings.ToUpper(k)) - errStrings = append(errStrings, s) + if v.Value == "" { + log.Printf("No value specified for the FORMAT/%v", strings.ToUpper(k)) } } } - if len(errStrings) != 0 { - log.Fatalf("The following errors were found in the config file:\n%v", strings.Join(errStrings, "\n")) - } } diff --git a/convert/resolve.go b/convert/resolve.go new file mode 100644 index 0000000..f647819 --- /dev/null +++ b/convert/resolve.go @@ -0,0 +1,143 @@ +package bedgovcf + +import ( + "fmt" + "log" + "math" + "slices" + "strconv" + "strings" +) + +func resolveField(configValues []string, bedValues []string, bedHeader []string) string { + + input := []string{} + for _, v := range configValues { + if strings.HasPrefix(v, "$") { + var headerIndex int + for j, w := range bedHeader { + if w == v[1:] { + headerIndex = j + break + } + } + input = append(input, bedValues[headerIndex]) + continue + } else { + input = append(input, v) + } + } + + function := "" + if strings.HasPrefix(input[0], "~") { + function = configValues[0][1:] + } else { + return strings.Join(input, " ") + } + + switch function { + case "round": + // ~round + float, err := strconv.ParseFloat(input[1], 64) + if err != nil { + log.Fatalf("Failed to parse the value (%v) to a float: %v", input[1], err) + } + round := math.Round(float) + if round == -0 { + round = 0 + } + return fmt.Sprintf("%v", round) + case "sum": + // ~sum ... + var sum float64 + for _, v := range input[1:] { + float, err := strconv.ParseFloat(v, 64) + if err != nil { + log.Fatalf("Failed to parse the value (%v) to a float: %v", v, err) + } + sum += float + } + + return strconv.FormatFloat(sum, 'g', -1, 64) + case "min": + // ~min ... + min, err := strconv.ParseFloat(input[1], 64) + if err != nil { + log.Fatalf("Failed to parse the value (%v) to a float: %v", input[1], err) + } + for _, v := range input[2:] { + float, err := strconv.ParseFloat(v, 64) + if err != nil { + log.Fatalf("Failed to parse the value (%v) to a float: %v", v, err) + } + min -= float + } + return strconv.FormatFloat(min, 'g', -1, 64) + case "if": + // ~if + // supported operators: > < >= <= == + v1 := input[1] + operator := input[2] + v2 := input[3] + vTrue := input[4] + vFalse := input[5:] + + floatV1, err1 := strconv.ParseFloat(v1, 64) + floatV2, err2 := strconv.ParseFloat(v2, 64) + + floatOperators := []string{"<", ">", "<=", ">="} + if slices.Contains(floatOperators, operator) && (err1 != nil || err2 != nil) { + log.Fatalf("Failed to parse the values (%v and %v) to a float: %v and %v", v1, v2, err1, err2) + } + + vFalseResolved := "" + if strings.HasPrefix(vFalse[0], "~") { + vFalseResolved = resolveField(vFalse, bedValues, bedHeader) + } else { + vFalseResolved = strings.Join(vFalse, " ") + } + + switch operator { + case "<": + if floatV1 < floatV2 { + return vTrue + } else { + return vFalseResolved + } + case ">": + if floatV1 > floatV2 { + return vTrue + } else { + return vFalseResolved + } + case ">=": + if floatV1 >= floatV2 { + return vTrue + } else { + return vFalseResolved + } + case "<=": + if floatV1 <= floatV2 { + return vTrue + } else { + return vFalseResolved + } + case "==": + if err1 == nil && err2 == nil { + if floatV1 == floatV2 { + return vTrue + } else { + return vFalseResolved + } + } else { + if v1 == v2 { + return vTrue + } else { + return vFalseResolved + } + } + } + } + + return "" +} diff --git a/convert/resolve_test.go b/convert/resolve_test.go new file mode 100644 index 0000000..5300733 --- /dev/null +++ b/convert/resolve_test.go @@ -0,0 +1,158 @@ +package bedgovcf + +import "testing" + +func TestFieldResolving(t *testing.T) { + value := resolveField([]string{"$test"}, []string{"value"}, []string{"test"}) + if value != "value" { + t.Fatalf("Expected value to be 'value', got %s", value) + } + + value = resolveField([]string{"$test", "$test2"}, []string{"value", "I don't want this", "value2"}, []string{"test", "whut", "test2"}) + if value != "value value2" { + t.Fatalf("Expected value to be 'value value2', got %s", value) + } + + value = resolveField([]string{"~sum", "$test", "$test2"}, []string{"10", "I don't want this", "2"}, []string{"test", "whut", "test2"}) + if value != "12" { + t.Fatalf("Expected value to be '12', got %s", value) + } +} + +func TestRound(t *testing.T) { + value := resolveField([]string{"~round", "1.5"}, []string{}, []string{}) + if value != "2" { + t.Fatalf("Expected value to be '2', got %s", value) + } + + value = resolveField([]string{"~round", "1.4"}, []string{}, []string{}) + if value != "1" { + t.Fatalf("Expected value to be '1', got %s", value) + } + + value = resolveField([]string{"~round", "-2.3"}, []string{}, []string{}) + if value != "-2" { + t.Fatalf("Expected value to be '-2', got %s", value) + } + + value = resolveField([]string{"~round", "-0.2"}, []string{}, []string{}) + if value != "0" { + t.Fatalf("Expected value to be '0', got %s", value) + } +} + +func TestSum(t *testing.T) { + value := resolveField([]string{"~sum", "1.5"}, []string{}, []string{}) + if value != "1.5" { + t.Fatalf("Expected value to be '1.5', got %s", value) + } + + value = resolveField([]string{"~sum", "1", "2"}, []string{}, []string{}) + if value != "3" { + t.Fatalf("Expected value to be '3', got %s", value) + } + + value = resolveField([]string{"~sum", "-2", "2"}, []string{}, []string{}) + if value != "0" { + t.Fatalf("Expected value to be '0', got %s", value) + } + + value = resolveField([]string{"~sum", "10", "15", "20", "-5"}, []string{}, []string{}) + if value != "40" { + t.Fatalf("Expected value to be '40', got %s", value) + } + + value = resolveField([]string{"~sum", "-10", "-20", "5"}, []string{}, []string{}) + if value != "-25" { + t.Fatalf("Expected value to be '-25', got %s", value) + } +} + +func TestMin(t *testing.T) { + value := resolveField([]string{"~min", "1.5"}, []string{}, []string{}) + if value != "1.5" { + t.Fatalf("Expected value to be '1.5', got %s", value) + } + + value = resolveField([]string{"~min", "1", "2"}, []string{}, []string{}) + if value != "-1" { + t.Fatalf("Expected value to be '-1', got %s", value) + } + + value = resolveField([]string{"~min", "-2", "2"}, []string{}, []string{}) + if value != "-4" { + t.Fatalf("Expected value to be '-4', got %s", value) + } + + value = resolveField([]string{"~min", "50", "15", "20", "-5"}, []string{}, []string{}) + if value != "20" { + t.Fatalf("Expected value to be '20', got %s", value) + } + + value = resolveField([]string{"~min", "-10", "-20", "5"}, []string{}, []string{}) + if value != "5" { + t.Fatalf("Expected value to be '5', got %s", value) + } +} + +func TestIf(t *testing.T) { + value := resolveField([]string{"~if", "1", ">", "2", "true", "false"}, []string{}, []string{}) + if value != "false" { + t.Fatalf("Expected value to be 'false', got %s", value) + } + + value = resolveField([]string{"~if", "1", "<", "2", "true", "false"}, []string{}, []string{}) + if value != "true" { + t.Fatalf("Expected value to be 'true', got %s", value) + } + + value = resolveField([]string{"~if", "1", "<=", "2", "true", "false"}, []string{}, []string{}) + if value != "true" { + t.Fatalf("Expected value to be 'true', got %s", value) + } + + value = resolveField([]string{"~if", "2", "<=", "2", "true", "false"}, []string{}, []string{}) + if value != "true" { + t.Fatalf("Expected value to be 'true', got %s", value) + } + + value = resolveField([]string{"~if", "1", ">=", "2", "true", "false"}, []string{}, []string{}) + if value != "false" { + t.Fatalf("Expected value to be 'false', got %s", value) + } + + value = resolveField([]string{"~if", "2", ">=", "2", "true", "false"}, []string{}, []string{}) + if value != "true" { + t.Fatalf("Expected value to be 'true', got %s", value) + } + + value = resolveField([]string{"~if", "1", "==", "2", "true", "false"}, []string{}, []string{}) + if value != "false" { + t.Fatalf("Expected value to be 'false', got %s", value) + } + + value = resolveField([]string{"~if", "1", "==", "1", "true", "false"}, []string{}, []string{}) + if value != "true" { + t.Fatalf("Expected value to be 'true', got %s", value) + } + + value = resolveField([]string{"~if", "test", "==", "test2", "true", "false"}, []string{}, []string{}) + if value != "false" { + t.Fatalf("Expected value to be 'false', got %s", value) + } + + value = resolveField([]string{"~if", "test", "==", "test", "true", "false"}, []string{}, []string{}) + if value != "true" { + t.Fatalf("Expected value to be 'true', got %s", value) + } + + value = resolveField([]string{"~if", "test", "==", "test2", "true", "~sum", "10", "20"}, []string{}, []string{}) + if value != "30" { + t.Fatalf("Expected value to be '30', got %s", value) + } + + value = resolveField([]string{"~if", "test", "==", "test", "true", "~sum", "10", "20"}, []string{}, []string{}) + if value != "true" { + t.Fatalf("Expected value to be 'true', got %s", value) + } +} diff --git a/convert/structs.go b/convert/structs.go index 4e22411..9e7288e 100644 --- a/convert/structs.go +++ b/convert/structs.go @@ -27,7 +27,6 @@ type ConfigHeaderStruct struct { // The struct for the standard fields type ConfigStandardFieldStruct struct { - Field string // The field to use as value Value string // The value to use Prefix string // The prefix to add to each value Options []ConfigHeaderStruct // The different options possible (only for ALT and FILTER) @@ -38,7 +37,6 @@ type MapConfigInfoFormatStruct map[string]ConfigInfoFormatStruct // The struct for the info and format fields type ConfigInfoFormatStruct struct { - Field string // The field to use as value Value string // The value to use Prefix string // The prefix to add to each value Description string // The description of the field diff --git a/convert/vcf.go b/convert/vcf.go index ee9726b..43ea5a4 100644 --- a/convert/vcf.go +++ b/convert/vcf.go @@ -190,20 +190,8 @@ func (cifs *ConfigInfoFormatStruct) getValue(values []string, header []string) s prefix = cifs.Prefix } - // TODO write a resolve function for conditional fields - if cifs.Field != "" { - var headerIndex int - for i, v := range header { - if v == cifs.Field { - headerIndex = i - break - } - } - return prefix + values[headerIndex] - } else if cifs.Value != "" { - return prefix + cifs.Value - } - return "" + return prefix + resolveField(strings.Split(cifs.Value, " "), values, header) + } // Get the value for the given field based on the config @@ -213,20 +201,8 @@ func (csfs *ConfigStandardFieldStruct) getValue(values []string, header []string prefix = csfs.Prefix } - // TODO write a resolve function for conditional fields - if csfs.Field != "" { - var headerIndex int - for i, v := range header { - if v == csfs.Field { - headerIndex = i - break - } - } - return prefix + values[headerIndex] - } else if csfs.Value != "" { - return prefix + csfs.Value - } - return "" + return prefix + resolveField(strings.Split(csfs.Value, " "), values, header) + } // Write the VCF struct to stdout or a file @@ -238,8 +214,8 @@ func (v *Vcf) Write(cCtx *cli.Context) { if stdout { fmt.Print(v.Header.String()) - for _, variant := range v.Variants { - fmt.Print(variant.String()) + for count, variant := range v.Variants { + fmt.Print(variant.String(count)) } } else { file, err := os.Create(cCtx.String("output")) @@ -248,19 +224,21 @@ func (v *Vcf) Write(cCtx *cli.Context) { } defer file.Close() file.WriteString(v.Header.String()) - for _, variant := range v.Variants { - file.WriteString(variant.String()) + for count, variant := range v.Variants { + file.WriteString(variant.String(count)) } } } // Convert a variant to a string -func (v Variant) String() string { +func (v Variant) String(count int) string { + + id := fmt.Sprintf("%v%v", v.Id, count) variant := strings.Join([]string{ v.Chrom, v.Pos, - v.Id, + id, v.Ref, v.Alt, v.Qual, diff --git a/convert/vcf_test.go b/convert/vcf_test.go index a3363b2..c1003da 100644 --- a/convert/vcf_test.go +++ b/convert/vcf_test.go @@ -56,7 +56,7 @@ func TestSetHeaderLines(t *testing.T) { }, }, Alt: ConfigStandardFieldStruct{ - Field: "test", + Value: "$test", Options: []ConfigHeaderStruct{ { Name: "DUP", @@ -65,7 +65,7 @@ func TestSetHeaderLines(t *testing.T) { }, }, Filter: ConfigStandardFieldStruct{ - Field: "test", + Value: "$test", Options: []ConfigHeaderStruct{ { Name: "PASS", @@ -75,7 +75,7 @@ func TestSetHeaderLines(t *testing.T) { }, Info: MapConfigInfoFormatStruct{ "SVLEN": ConfigInfoFormatStruct{ - Field: "test", + Value: "$test", Description: "The length of the SV", Number: "1", Type: "Integer", @@ -83,7 +83,7 @@ func TestSetHeaderLines(t *testing.T) { }, Format: MapConfigInfoFormatStruct{ "GT": ConfigInfoFormatStruct{ - Field: "test", + Value: "$test", Description: "Genotype", Number: "1", Type: "String", @@ -142,7 +142,7 @@ func TestSetHeaderLines(t *testing.T) { func TestStandardGetValue(t *testing.T) { config := ConfigStandardFieldStruct{ - Field: "test", + Value: "$test", } header := []string{"test", "test2"} values := []string{"value", "I don't want this"} @@ -161,7 +161,7 @@ func TestStandardGetValue(t *testing.T) { } config = ConfigStandardFieldStruct{ - Field: "2", + Value: "$2", } header = []string{"0", "1", "2", "3"} values = []string{"value", "I don't want this", "this is the one", "definitely not this"} @@ -173,7 +173,7 @@ func TestStandardGetValue(t *testing.T) { func TestInfoFormatGetValue(t *testing.T) { config := ConfigInfoFormatStruct{ - Field: "test", + Value: "$test", } header := []string{"test", "test2"} values := []string{"value", "I don't want this"} @@ -192,7 +192,7 @@ func TestInfoFormatGetValue(t *testing.T) { } config = ConfigInfoFormatStruct{ - Field: "2", + Value: "$2", } header = []string{"0", "1", "2", "3"} values = []string{"value", "I don't want this", "this is the one", "definitely not this"} @@ -227,8 +227,8 @@ func TestVariantString(t *testing.T) { }, } - if variant.String() != "chr1\t123\ttest\tA\tC\t100\tPASS\tSVLEN=100\tGT\t0/1\n" { - t.Fatalf("Expected variant string to be 'chr1\t123\ttest\tA\tC\t100\tPASS\tSVLEN=100\tGT\t0/1\n', got '%s'", variant.String()) + if variant.String(1) != "chr1\t123\ttest1\tA\tC\t100\tPASS\tSVLEN=100\tGT\t0/1\n" { + t.Fatalf("Expected variant string to be 'chr1\t123\ttest\tA\tC\t100\tPASS\tSVLEN=100\tGT\t0/1\n', got '%s'", variant.String(1)) } variant = Variant{ @@ -265,8 +265,8 @@ func TestVariantString(t *testing.T) { }, } - if variant.String() != "chr1 123 test A C 100 PASS SVLEN=100;SVTYPE=DEL GT:CN 0/1:2\n" { - t.Fatalf("Expected variant string to be 'chr1 123 test A C 100 PASS SVLEN=100;SVTYPE=DEL GT:CN 0/1:2\n', got '%s'", variant.String()) + if variant.String(1) != "chr1 123 test1 A C 100 PASS SVLEN=100;SVTYPE=DEL GT:CN 0/1:2\n" { + t.Fatalf("Expected variant string to be 'chr1 123 test A C 100 PASS SVLEN=100;SVTYPE=DEL GT:CN 0/1:2\n', got '%s'", variant.String(1)) } } diff --git a/go.mod b/go.mod index 3b92aa6..4632fb3 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/nvnieuwk/bedgovcf -go 1.20 +go 1.21 require ( github.com/urfave/cli/v2 v2.25.7 diff --git a/test_data/big/qdnaseq.yaml b/test_data/big/qdnaseq.yaml index 2a9d97b..4458795 100644 --- a/test_data/big/qdnaseq.yaml +++ b/test_data/big/qdnaseq.yaml @@ -2,16 +2,16 @@ header: test: value: "test" chrom: - field: 0 + value: $0 prefix: chr pos: - field: 1 + value: $1 id: prefix: qdnaseq_ ref: value: N alt: - value: + value: ~if $4 < 0 options: - name: DEL description: Deletion @@ -26,15 +26,17 @@ filter: description: All filters passed info: cnv_ratio: - field: 4 + value: $4 end: - field: 2 + value: $2 svtype: value: CNV tool: value: qdnaseq + svlen: + value: ~min $2 $1 format: gt: value: 0/1 cn: - field: 4 + value: ~round $4