diff --git a/cmd/roy/data/DROID_SignatureFile_V114.xml b/cmd/roy/data/DROID_SignatureFile_V114.xml index 249614554..844e7f40e 100644 --- a/cmd/roy/data/DROID_SignatureFile_V114.xml +++ b/cmd/roy/data/DROID_SignatureFile_V114.xml @@ -1,4 +1,5 @@ + @@ -52744,7 +52745,7 @@ PUID="fmt/9" Version="5"/> - 123 @@ -52792,7 +52793,7 @@ 687 869 - 18 gif @@ -53005,7 +53006,7 @@ wav 2741 - 51 avi @@ -53060,7 +53061,7 @@ 54 wrl - 58 png @@ -53523,7 +53524,7 @@ 127 doc - 1032 wav @@ -58786,7 +58787,7 @@ 1364 exr - 1357 nrrd diff --git a/cmd/sf/pronom_test.go b/cmd/sf/pronom_test.go index b93d7d5f1..422e7de9c 100644 --- a/cmd/sf/pronom_test.go +++ b/cmd/sf/pronom_test.go @@ -29,7 +29,7 @@ type pronomIdentificationTests struct { var skeletons = make(map[string]*fstest.MapFile) -var minimalPronom = []string{"fmt/1", "fmt/3", "fmt/5", "fmt/11", "fmt/14"} +var minimalPronom = []string{"fmt/1", "fmt/3", "fmt/5", "fmt/11", "fmt/14", "fmt/1002"} // Populate the global skeletons map from string-based byte-sequences to // save having to store skeletons on disk and read from them. @@ -59,6 +59,7 @@ func makeSkeletons() { "") files["fmt-3-signature-id-18.gif"] = "4749463837613b" files["badf00d.unknown"] = "badf00d" + files["fmt-1002-signature-id-1357.nrrd"] = "4e52524430302e3031" for key, val := range files { data, _ := hex.DecodeString(val) skeletons[key] = &fstest.MapFile{Data: []byte(data)} @@ -125,20 +126,21 @@ var pronomIDs = []pronomIdentificationTests{ "Audio, Video", "extension match avi; byte match at 0, 12", "", + }, { + "pronom", + "fmt/1002", + "Nearly Raw Raster Data", + "1", + "", + "Image (Raster), Dataset", + "extension match nrrd; byte match at 0, 9", + "", }, } -// TestPronom looks to see if PRONOM identification results for a -// minimized PRONOM dataset are correct and contain the information we -// anticipate. -func TestPronom(t *testing.T) { - sf := siegfried.New() - config.SetHome(DataPath) - identifier, err := pronom.New(config.SetLimit(minimalPronom)) - if err != nil { - t.Errorf("Error creating new PRONOM identifier: %s", err) - } - sf.Add(identifier) +// runIdentificationWithSF provides a number of tests that can be run +// against a Siegfried. +func runIdentificationWithSF(sf *siegfried.Siegfried, t *testing.T) { makeSkeletons() skeletonFS := fstest.MapFS(skeletons) testDirListing, err := skeletonFS.ReadDir(".") @@ -182,5 +184,38 @@ func TestPronom(t *testing.T) { t.Errorf("Results not equal for %s; expected %v; got %v", res.puid, pronomIDs[idx], res) } } +} + +// TestPronom looks to see if PRONOM identification results for a +// minimized PRONOM dataset are correct and contain the information we +// anticipate. +func TestPronom(t *testing.T) { + sf := siegfried.New() + config.SetHome(DataPath) + identifier, err := pronom.New(config.SetLimit(minimalPronom)) + if err != nil { + t.Errorf("Error creating new PRONOM identifier: %s", err) + } + sf.Add(identifier) + runIdentificationWithSF(sf, t) + config.Clear()() +} + +// TestPronomNoReports performs the same tests as TestPronom, but +// against a Siegfried created purely from a signature file. +func TestPronomNoReports(t *testing.T) { + sf := siegfried.New() + config.SetHome(DataPath) + config.SetNoContainer()() + config.SetNoReports()() + if config.Reports() != "" { + t.Errorf("pronon.reports should be unset, not: %s", config.Reports()) + } + identifier, err := pronom.New(config.SetLimit(minimalPronom)) + if err != nil { + t.Errorf("Error creating new PRONOM identifier: %s", err) + } + sf.Add(identifier) + runIdentificationWithSF(sf, t) config.Clear()() } diff --git a/pkg/config/identifier.go b/pkg/config/identifier.go index 6b7a5e0af..09e072b2b 100644 --- a/pkg/config/identifier.go +++ b/pkg/config/identifier.go @@ -316,6 +316,7 @@ func IsArchive(id string) Archive { // Clear clears loc and mimeinfo details to avoid pollution when creating multiple identifiers in same session func Clear() func() private { return func() private { + identifier.noContainer = false identifier.name = "" identifier.extend = nil identifier.limit = nil @@ -323,6 +324,7 @@ func Clear() func() private { identifier.multi = Conclusive loc.fdd = "" mimeinfo.mi = "" + pronom.reports = "pronom" return private{} } } diff --git a/pkg/pronom/identifier.go b/pkg/pronom/identifier.go index 68ff8eea9..116b33591 100644 --- a/pkg/pronom/identifier.go +++ b/pkg/pronom/identifier.go @@ -119,7 +119,7 @@ func New(opts ...config.Option) (core.Identifier, error) { pronom = identifier.ApplyConfig(pronom) id := &Identifier{ Base: identifier.New(pronom, config.ZipPuid()), - hasClass: config.Reports() != "" && !config.NoClass(), + hasClass: !config.NoClass(), infos: infos(pronom.Infos()), } if id.Multi() == config.DROID { diff --git a/pkg/pronom/internal/mappings/droid.go b/pkg/pronom/internal/mappings/droid.go index c762b943d..e8dc8f050 100644 --- a/pkg/pronom/internal/mappings/droid.go +++ b/pkg/pronom/internal/mappings/droid.go @@ -59,6 +59,7 @@ type FileFormat struct { Name string `xml:",attr"` Version string `xml:",attr"` MIMEType string `xml:",attr"` + FormatType string `xml:",attr"` Extensions []string `xml:"Extension"` Signatures []int `xml:"InternalSignatureID"` Priorities []int `xml:"HasPriorityOverFileFormatID"` diff --git a/pkg/pronom/parseable.go b/pkg/pronom/parseable.go index 0edeb404b..59d617f26 100644 --- a/pkg/pronom/parseable.go +++ b/pkg/pronom/parseable.go @@ -233,6 +233,7 @@ func (d *droid) Infos() map[string]identifier.FormatInfo { name: strings.TrimSpace(v.Name), version: strings.TrimSpace(v.Version), mimeType: strings.TrimSpace(v.MIMEType), + class: strings.TrimSpace(v.FormatType), } } return infos diff --git a/pkg/pronom/pronom_test.go b/pkg/pronom/pronom_test.go index 3f13047df..58911d43c 100644 --- a/pkg/pronom/pronom_test.go +++ b/pkg/pronom/pronom_test.go @@ -6,6 +6,7 @@ import ( "sort" "testing" + "github.com/richardlehane/siegfried/internal/identifier" "github.com/richardlehane/siegfried/pkg/config" ) @@ -21,15 +22,9 @@ func TestNew(t *testing.T) { } } -// TestFormatInfos inspects the values loaded into a PRONOM identifier -// from a minimal PRONOM dataset, i.e. fewer than loading all of PRONOM. -func TestFormatInfos(t *testing.T) { - config.SetHome(dataPath) - config.SetLimit(minimalPronom)() - i, err := NewPronom() - if err != nil { - t.Error(err) - } +// verifyIdentifier provides a number of tests that can be run against +// a PRONOM identifier. +func verifyIdentifier(i identifier.Parseable, t *testing.T) { const minReports int = 5 if len(i.Infos()) != minReports { t.Error("Unexpected number of reports for PRONOM minimal tests") @@ -84,7 +79,7 @@ func TestFormatInfos(t *testing.T) { sort.Strings(puids) sort.Strings(expectedPuids) if !reflect.DeepEqual(puids, expectedPuids) { - t.Error("PUIDs from minimal PRONOM set do not match expected values") + t.Errorf("PUIDs from minimal PRONOM set do not match expected values; expected %v; got %v", puids, expectedPuids) } sort.Strings(names) sort.Strings(expectedNames) @@ -94,17 +89,49 @@ func TestFormatInfos(t *testing.T) { sort.Strings(versions) sort.Strings(expectedVersions) if !reflect.DeepEqual(versions, expectedVersions) { - t.Error("Format versions from minimal PRONOM set do not match expected values") + t.Errorf("Format versions from minimal PRONOM set do not match expected values; expected %v; got %v", versions, expectedVersions) } sort.Strings(mimes) sort.Strings(expectedMimes) if !reflect.DeepEqual(mimes, expectedMimes) { - t.Error("MIMETypes from minimal PRONOM set do not match expected values") + t.Errorf("MIMETypes from minimal PRONOM set do not match expected values; expected %v; got %v", mimes, expectedMimes) } sort.Strings(types) sort.Strings(expectedTypes) if !reflect.DeepEqual(types, expectedTypes) { - t.Error("Format types from minimal PRONOM set do not match expected values") + t.Errorf("Format types from minimal PRONOM set do not match expected values; expected %v; got %v", types, expectedTypes) + } +} + +// TestFormatInfosDefault inspects the values loaded into a PRONOM +// identifier from a minimal PRONOM dataset, i.e. fewer than loading +// all of PRONOM. +func TestFormatInfosDefault(t *testing.T) { + config.SetHome(dataPath) + config.SetLimit(minimalPronom)() + i, err := NewPronom() + if err != nil { + t.Error(err) + } + verifyIdentifier(i, t) + config.Clear()() +} + +// TestFormatInfosNoReports performs the same tests as TestFormatInfosDefault +// but does so without loading PRONOM reports, preferring to create an +// identifier using a signature file only. +func TestFormatInfosNoReports(t *testing.T) { + config.SetHome(dataPath) + config.SetLimit(minimalPronom)() + config.SetNoContainer()() + config.SetNoReports()() + if config.Reports() != "" { + t.Errorf("pronon.reports should be unset, not: %s", config.Reports()) + } + i, err := NewPronom() + if err != nil { + t.Error(err) } + verifyIdentifier(i, t) config.Clear()() }