diff --git a/CHANGELOG.md b/CHANGELOG.md index cf8eab7..90b8537 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## [v0.5.2] + +- Add [#58]: add all matches/all sources to web interface. + ## [v0.5.1] - Fix: command line and configuration options @@ -87,6 +91,12 @@ This document follows [changelog guidelines] +[v0.5.2]: https://github.com/gnames/gnverifier/compare/v0.5.1...v0.5.2 +[v0.5.1]: https://github.com/gnames/gnverifier/compare/v0.5.0...v0.5.1 +[v0.5.0]: https://github.com/gnames/gnverifier/compare/v0.4.1...v0.5.0 +[v0.4.1]: https://github.com/gnames/gnverifier/compare/v0.4.0...v0.4.1 +[v0.4.0]: https://github.com/gnames/gnverifier/compare/v0.3.3...v0.4.0 +[v0.3.3]: https://github.com/gnames/gnverifier/compare/v0.3.2...v0.3.3 [v0.3.2]: https://github.com/gnames/gnverifier/compare/v0.3.1...v0.3.2 [v0.3.1]: https://github.com/gnames/gnverifier/compare/v0.3.0...v0.3.1 [v0.3.0]: https://github.com/gnames/gnverifier/compare/v0.2.5...v0.3.0 @@ -98,6 +108,16 @@ This document follows [changelog guidelines] [v0.2.0]: https://github.com/gnames/gnverifier/compare/v0.1.0...v0.2.0 [v0.1.0]: https://github.com/gnames/gnverifier/tree/v0.1.0 +[#70]: https://github.com/gnames/gnverifier/issues/70 +[#69]: https://github.com/gnames/gnverifier/issues/69 +[#68]: https://github.com/gnames/gnverifier/issues/68 +[#67]: https://github.com/gnames/gnverifier/issues/67 +[#66]: https://github.com/gnames/gnverifier/issues/66 +[#65]: https://github.com/gnames/gnverifier/issues/65 +[#64]: https://github.com/gnames/gnverifier/issues/64 +[#63]: https://github.com/gnames/gnverifier/issues/63 +[#62]: https://github.com/gnames/gnverifier/issues/62 +[#61]: https://github.com/gnames/gnverifier/issues/61 [#60]: https://github.com/gnames/gnverifier/issues/60 [#59]: https://github.com/gnames/gnverifier/issues/59 [#58]: https://github.com/gnames/gnverifier/issues/58 diff --git a/gnverifier.go b/gnverifier.go index 1f9b9d8..2034b20 100644 --- a/gnverifier.go +++ b/gnverifier.go @@ -7,6 +7,7 @@ import ( "sync" "time" + "github.com/gnames/gnlib/ent/gnvers" vlib "github.com/gnames/gnlib/ent/verifier" "github.com/gnames/gnverifier/config" "github.com/gnames/gnverifier/ent/verifier" @@ -26,6 +27,11 @@ func New(cnf config.Config, vfr verifier.Verifier) GNverifier { } } +// GetVersion returns version and build of GNverifier +func (gnv gnverifier) GetVersion() gnvers.Version { + return gnvers.Version{Version: Version, Build: Build} +} + // DataSources returns meta-information about aggregated data-sources. func (gnv gnverifier) DataSources() ([]vlib.DataSource, error) { return gnv.verifier.DataSources(context.Background()) @@ -52,12 +58,7 @@ func (gnv gnverifier) Config() config.Config { // VerifyOne verifies one input string and returns results // as a string in JSON or CSV format. func (gnv gnverifier) VerifyOne(name string) (vlib.Verification, error) { - params := vlib.VerifyParams{ - NameStrings: []string{name}, - PreferredSources: gnv.config.PreferredSources, - WithAllMatches: gnv.config.WithAllMatches, - WithCapitalization: gnv.config.WithCapitalization, - } + params := gnv.setParams([]string{name}) verif := gnv.verifier.Verify(context.Background(), params) if len(verif) < 1 { return vlib.Verification{}, errors.New("no verification results") @@ -145,6 +146,7 @@ func (gnv gnverifier) setParams(names []string) vlib.VerifyParams { NameStrings: names, PreferredSources: gnv.config.PreferredSources, WithCapitalization: gnv.config.WithCapitalization, + WithAllMatches: gnv.config.WithAllMatches, } return res } diff --git a/interface.go b/interface.go index f91ddac..8b835b0 100644 --- a/interface.go +++ b/interface.go @@ -1,6 +1,7 @@ package gnverifier import ( + "github.com/gnames/gnlib/ent/gnvers" vlib "github.com/gnames/gnlib/ent/verifier" "github.com/gnames/gnverifier/config" ) @@ -32,4 +33,7 @@ type GNverifier interface { // DataSource uses ID input to return meta-information about a particular // data-source. DataSource(id int) (vlib.DataSource, error) + + // GetVersion returns version of the gnverifier + GetVersion() gnvers.Version } diff --git a/io/web/server.go b/io/web/server.go index 87246df..1a1aad8 100644 --- a/io/web/server.go +++ b/io/web/server.go @@ -24,6 +24,8 @@ type formInput struct { Names string `query:"names" form:"names"` Format string `query:"format" form:"format"` PreferredOnly string `query:"preferred_only" form:"preferred_only"` + AllSources string `query:"all_sources" form:"all_sources"` + AllMatches string `query:"all_matches" form:"all_matches"` Capitalize string `query:"capitalize" form:"capitalize"` DS []int `query:"ds" form:"ds"` } @@ -51,8 +53,8 @@ func Run(gnv gnverifier.GNverifier, port int) { e.POST("/", homePOST(gnv)) e.GET("/data_sources", dataSources(gnv)) e.GET("/data_sources/:id", dataSource(gnv)) - e.GET("/about", about()) - e.GET("/api", api()) + e.GET("/about", about(gnv)) + e.GET("/api", api(gnv)) fs := http.FileServer(http.FS(static)) e.GET("/static/*", echo.WrapHandler(fs)) @@ -71,21 +73,23 @@ type Data struct { Input string Format string Preferred []int + AllMatches bool Verified []vlib.Verification DataSources []vlib.DataSource DataSource vlib.DataSource + Version string } -func about() func(echo.Context) error { +func about(gnv gnverifier.GNverifier) func(echo.Context) error { return func(c echo.Context) error { - data := Data{Page: "about"} + data := Data{Page: "about", Version: gnv.GetVersion().Version} return c.Render(http.StatusOK, "layout", data) } } -func api() func(echo.Context) error { +func api(gnv gnverifier.GNverifier) func(echo.Context) error { return func(c echo.Context) error { - data := Data{Page: "api"} + data := Data{Page: "api", Version: gnv.GetVersion().Version} return c.Render(http.StatusOK, "layout", data) } } @@ -93,7 +97,7 @@ func api() func(echo.Context) error { func dataSources(gnv gnverifier.GNverifier) func(echo.Context) error { return func(c echo.Context) error { var err error - data := Data{Page: "data_sources"} + data := Data{Page: "data_sources", Version: gnv.GetVersion().Version} data.DataSources, err = gnv.DataSources() if err != nil { return err @@ -105,7 +109,7 @@ func dataSources(gnv gnverifier.GNverifier) func(echo.Context) error { func dataSource(gnv gnverifier.GNverifier) func(echo.Context) error { return func(c echo.Context) error { var err error - data := Data{Page: "data_source"} + data := Data{Page: "data_source", Version: gnv.GetVersion().Version} idStr := c.Param("id") id, err := strconv.Atoi(idStr) if err != nil { @@ -121,7 +125,7 @@ func dataSource(gnv gnverifier.GNverifier) func(echo.Context) error { func homeGET(gnv gnverifier.GNverifier) func(echo.Context) error { return func(c echo.Context) error { - data := Data{Page: "home", Format: "html"} + data := Data{Page: "home", Format: "html", Version: gnv.GetVersion().Version} inp := new(formInput) err := c.Bind(inp) @@ -140,7 +144,7 @@ func homeGET(gnv gnverifier.GNverifier) func(echo.Context) error { func homePOST(gnv gnverifier.GNverifier) func(echo.Context) error { return func(c echo.Context) error { inp := new(formInput) - data := Data{Page: "home", Format: "html"} + data := Data{Page: "home", Format: "html", Version: gnv.GetVersion().Version} err := c.Bind(inp) if err != nil { @@ -186,6 +190,8 @@ func redirectToHomeGET(c echo.Context, inp *formInput) error { q := make(url.Values) q.Set("names", inp.Names) q.Set("format", inp.Format) + q.Set("all_sources", inp.AllSources) + q.Set("all_matches", inp.AllMatches) if prefOnly { q.Set("preferred_only", inp.PreferredOnly) } @@ -208,9 +214,15 @@ func verificationResults( var names []string prefOnly := inp.PreferredOnly == "on" caps := inp.Capitalize == "on" + data.AllMatches = inp.AllMatches == "on" data.Input = inp.Names + data.Preferred = inp.DS + if inp.AllSources == "on" { + data.Preferred = []int{0} + } + format := inp.Format if format == "csv" || format == "json" || format == "tsv" { data.Format = format @@ -233,6 +245,7 @@ func verificationResults( opts := []config.Option{ config.OptPreferredSources(data.Preferred), config.OptWithCapitalization(caps), + config.OptWithAllMatches(data.AllMatches), } gnv = gnv.ChangeConfig(opts...) diff --git a/io/web/server_test.go b/io/web/server_test.go index daa17aa..dfe5466 100644 --- a/io/web/server_test.go +++ b/io/web/server_test.go @@ -33,7 +33,13 @@ func handlerGET(path string, t *testing.T) (echo.Context, *httptest.ResponseReco func TestAbout(t *testing.T) { c, rec := handlerGET("/about", t) - assert.Nil(t, about()(c)) + verifs := verifications(t) + cfg := config.New() + vfr := new(vtest.FakeVerifier) + vfr.VerifyReturns(verifs) + gnv := gnverifier.New(cfg, vfr) + + assert.Nil(t, about(gnv)(c)) assert.Equal(t, rec.Code, http.StatusOK) assert.Contains(t, rec.Body.String(), "Matching Process") } @@ -41,7 +47,13 @@ func TestAbout(t *testing.T) { func TestAPI(t *testing.T) { c, rec := handlerGET("/api", t) - assert.Nil(t, api()(c)) + verifs := verifications(t) + cfg := config.New() + vfr := new(vtest.FakeVerifier) + vfr.VerifyReturns(verifs) + gnv := gnverifier.New(cfg, vfr) + + assert.Nil(t, api(gnv)(c)) assert.Equal(t, rec.Code, http.StatusOK) assert.Contains(t, rec.Body.String(), "OpenAPI Schema") } diff --git a/io/web/templates.go b/io/web/templates.go index 5cc2c5e..26d0f7a 100644 --- a/io/web/templates.go +++ b/io/web/templates.go @@ -105,19 +105,19 @@ func addFuncs(tmpl *template.Template) { "matchType": func(mt vlib.MatchTypeValue, ed int) template.HTML { var res string clr := map[string]string{ - "green": "#080", - "yellow": "#a80", - "red": "#800", + "green": "green", + "yellow": "orange", + "red": "red", } switch mt { case vlib.Exact: - res = fmt.Sprintf("%s match by canonical form", clr["green"], mt) + res = fmt.Sprintf(" %s match by canonical form", clr["green"], mt) case vlib.NoMatch: - res = fmt.Sprintf("%s", clr["red"], mt) + res = fmt.Sprintf(" %s", clr["red"], mt) case vlib.Fuzzy, vlib.PartialFuzzy: - res = fmt.Sprintf("%s match, edit distance: %d", clr["yellow"], mt, ed) + res = fmt.Sprintf(" %s match, edit distance: %d", clr["yellow"], mt, ed) default: - res = fmt.Sprintf("%s match", clr["yellow"], mt) + res = fmt.Sprintf(" %s match", clr["yellow"], mt) } return template.HTML(res) }, diff --git a/io/web/templates/about.html b/io/web/templates/about.html index 78b87da..77d1b6c 100644 --- a/io/web/templates/about.html +++ b/io/web/templates/about.html @@ -5,152 +5,151 @@

Synopsis

- Scientific names are critical metadata elements in biodiversity. They are the scaffolding upon which all biological information hangs. - However, scientific names are imperfect identifiers. Some taxa share the same name (e.g. homonyms across - nomenclature codes) - and there can be many names for the same taxon. Names change because of taxonomic and nomenclatural revisions and they can be persistently misspelled in the literature. Optical scanning of printed material compounds the problem by introducing greater uncertainty in data integration. + Scientific names are critical metadata elements in biodiversity. They are the scaffolding upon which all biological information hangs. + However, scientific names are imperfect identifiers. Some taxa share the same name (e.g. homonyms across + nomenclature codes) + and there can be many names for the same taxon. Names change because of taxonomic and nomenclatural revisions and they can be persistently misspelled in the literature. Optical scanning of printed material compounds the problem by introducing greater uncertainty in data integration.

- This verification service tries to answer the following questions about a string representing a scientific name: -

+ This verification service tries to answer the following questions about a string representing a scientific name: +

Matching Process

1. Exact Matching

- Submitted names are parsed first and their canonical forms are checked - for exact matches against names in the entire verifier database. An algorithm - than sorts names according to scoring algorithm and returns the best match - back. + Submitted names are parsed first and their canonical forms are checked + for exact matches against names in the entire verifier database. An algorithm + than sorts names according to scoring algorithm and returns the best match + back.

Canonical forms

- Name strings are often supplied with complex authorship information [e.g. - Racomitrium canescens f. epilosum (H. Müll. ex Milde) G. - Jones in Grout]. The Global Name parser strips authorship and rank - information from names [e.g. Racomitrium canescens epilosum], which - makes it possible to compare the string with other variants of the same name. - Resulting canonical forms are checked for exact matches against canonical - forms in specified data sources or in the entire resolver database. All found - names are removed from the process at the completion of this step. + Name strings are often supplied with complex authorship information [e.g. + Racomitrium canescens f. epilosum (H. Müll. ex Milde) G. + Jones in Grout]. The Global Name parser strips authorship and rank + information from names [e.g. Racomitrium canescens epilosum], which + makes it possible to compare the string with other variants of the same name. + Resulting canonical forms are checked for exact matches against canonical + forms in specified data sources or in the entire resolver database. All found + names are removed from the process at the completion of this step.

- The GNparser program - performs all the parsing steps + The GNparser program + performs all the parsing steps

2. Fuzzy Matching of Canonical Forms

- Mistakes, misspellings, or OCR errors can create incorrect variants of - scientific names. Remaining canonical forms generated from the previous step - are fuzzily matched against canonical forms in specified data sources. We use - a modified version of the TaxaMatch algorithm developed by Tony Rees. - After this step all found names are removed from the process. + Mistakes, misspellings, or OCR errors can create incorrect variants of + scientific names. Remaining canonical forms generated from the previous step + are fuzzily matched against canonical forms in specified data sources. + After this step all found names are removed from the process.

-

3. Partial Exact Matching of Names

+

3. Partial Exact Matching of Names

- Some infraspecific names do not match anything in the verification database. - Sometimes it happens because the name does not exist in the collected data. - Sometimes a 'junk' word is wrongly included and the parser may recognize it - as an infraspecific epithet. Sometimes an infraspecies are "promoted" to - species and the middle word disappears. The algorithm removes middle or - terminal words and tries to match resulting canonical forms. For example, - the last word "Pardosa moesta spider" would be ignored - given a match to "Pardosa moesta". + Some infraspecific names do not match anything in the verification database. + Sometimes it happens because the name does not exist in the collected data. + Sometimes a 'junk' word is wrongly included and the parser may recognize it + as an infraspecific epithet. Sometimes an infraspecies are "promoted" to + species and the middle word disappears. The algorithm removes middle or + terminal words and tries to match resulting canonical forms. For example, + the last word "Pardosa moesta spider" would be ignored + given a match to "Pardosa moesta".

4. Fuzzy Partial Matching

- If exact partial matching failed, we try to make an aproximate match. + If exact partial matching failed, we try to make an aproximate match.

5. Exact Matching of a Genus Part

- If anything else fails we try to match an apparent genus of the input. + If anything else fails we try to match an apparent genus of the input.

-

Scoring algorithm

+

Scoring algorithm

-

-More often than not, the verification returns more than one result back. In -some occations there might be thousands of matching names. We decided to -return only one "best" result, still giving a possibility to get data from -data-sources a user is interested in. The algorithm uses the following criteria -for sorting the results: -

+

+ More often than not, the verification returns more than one result back. In + some occations there might be thousands of matching names. We decided to + return only one "best" result, still giving a possibility to get data from + data-sources a user is interested in. The algorithm uses the following criteria + for sorting the results: +

-

Infraspecific ranks

+

Infraspecific ranks

-

-Botanical nomenclatural - code allows a variety of ranks in the infraspecific names. The -algorithm favors results that contain the same rank as the input name. -

+

+ Botanical nomenclatural + code allows a variety of ranks in the infraspecific names. The + algorithm favors results that contain the same rank as the input name. +

-

Edit distance

+

Edit distance

-

-In cases when results are "fuzzy-matched", algorithm favors matches with the -smallest edit -distance determined according to Levenshtein -algorithm. -

+

+ In cases when results are "fuzzy-matched", algorithm favors matches with the + smallest edit + distance determined according to Levenshtein + algorithm. +

-

Data source curation

+

Data source curation

-

-Algorithm favors data-sources that are known for a significant curatorial -effort over ones that are not curated, or their curation effort is unknown. -

+

+ Algorithm favors data-sources that are known for a significant curatorial + effort over ones that are not curated, or their curation effort is unknown. +

-

Authorship

+

Authorship

-

-For inputs that contain authorship, algorithm favors matches that contain -the same, or similar authorship. -

+

+ For inputs that contain authorship, algorithm favors matches that contain + the same, or similar authorship. +

-

Current acceptance of a name

+

Current acceptance of a name

-

-A result is favored over other results, if it is a currently accepted name, -and not some kind of a synonym or a misspelling. -

+

+ A result is favored over other results, if it is a currently accepted name, + and not some kind of a synonym or a misspelling. +

-

Parsing quality

+

Parsing quality

-

-GNparser returns a parsing -quality value after extraction of a canonical form. The algorithm favors -high quality parsing over lower quality. -

+

+ GNparser returns a parsing + quality value after extraction of a canonical form. The algorithm favors + high quality parsing over lower quality. +

-

Preferred data sources

+

Preferred data sources

-

-Sometimes a user is more interested to get results from a particular -data-source, and less interested in a "best result". For such cases there is -an option to always return data from such a data-source. +

+ Sometimes a user is more interested to get results from a particular + data-source, and less interested in a "best result". For such cases there is + an option to always return data from such a data-source. -It is also possible to completely ignore "best result". It might be useful when -a user tries to map their checklist to a particular data-source. -

+ It is also possible to completely ignore "best result". It might be useful when + a user tries to map their checklist to a particular data-source. +

+
{{ end }} diff --git a/io/web/templates/api.html b/io/web/templates/api.html index f11f0c6..d0024df 100644 --- a/io/web/templates/api.html +++ b/io/web/templates/api.html @@ -2,49 +2,52 @@

Application Programming Interface (API)

- -

Web-based verification service includes a RESTful interface.

- -

GET

- -

-Append a vertical line separated array of strings to your domain url. -Make sure that '&' in the names are escaped as '%26', -and spaces are escaped as '+'. -

- -

-/api/v1/verifications/Aus+bus|Aus+bus+D.+%26+M.,+1870 -

- -

POST

- -

/api/v1/verifications

- -

-with request body in JSON format: -

- -
-  {
-    "nameStrings": [
-    "Pomatomus soltator",
-    "Bubo bubo (Linnaeus, 1758)"
-    ],
-    "preferredSources": [
-      1,
-      12,
-      169
-    ]
-  }
-
- -

OpenAPI Schema

-

-Read the GNames's - - Verification OpenAPI documentation - to learn about all options and the output schema. -

+
+

Web-based verification service includes a RESTful interface.

+ +

GET

+ +

+ Append a vertical line separated array of strings to your domain url. + Make sure that '&' in the names are escaped as '%26', + and spaces are escaped as '+'. +

+ +
+    /api/v1/verifications/Aus+bus|Aus+bus+D.+%26+M.,+1870
+  
+ +

POST

+ +

/api/v1/verifications

+ +

+ with request body in JSON format according to + + specification + : +

+ +
+    {
+      "nameStrings": [
+        "Pomatomus soltator",
+        "Bubo bubo (Linnaeus, 1758)"
+      ],
+      "preferredSources": [
+        1,
+        12,
+        169
+      ]
+    }
+  
+ +

OpenAPI Schema

+

+ Read the GNames's + + Verification OpenAPI documentation + to learn about all options and the output schema. +

{{ end }} diff --git a/io/web/templates/home.html b/io/web/templates/home.html index f563a57..294f4b9 100644 --- a/io/web/templates/home.html +++ b/io/web/templates/home.html @@ -1,7 +1,7 @@ {{ define "home" }} - {{ if .Verified }} +{{ if .Verified }}

Results

- {{ range .Verified }} +{{ range .Verified }}
@@ -14,7 +14,7 @@

{{ .Input }}

{{ $best := .BestResult }} {{ if $best }} - {{ template "results" $best}} + {{ template "results" $best}} {{ end }} {{ $pref := .PreferredResults }} @@ -24,41 +24,60 @@
From preferred Data Sources:
{{ end }} {{ range $pref }} - {{ template "results" . }} + {{ template "results" . }} {{ end }}
- {{ end }} - {{ else }} -

Verify a list of scientific names against biodiversity data-sources. This service parses incoming names, executes exact or fuzzy matching as required, and returns the best-scored result. It can also return matches from data-sources selected by a user.

+{{ end }} +{{ else }} +

Verify a list of scientific names against + biodiversity data-sources. + This service parses incoming names, executes exact or + fuzzy matching as required, and returns the best-scored result + Optionally, it can also return matches from data-sources selected by a user. +

+

Paste Scientific Names, one per line (up to 5,000 names)

- +
+ + +     + + +
-
- Output - - - - - - - - -

- - - - -
-
Preferred Data Sources + + + + + + +
+ + + + + + + + +
+ +
+
@@ -140,5 +159,5 @@
From preferred Data Sources:
- {{ end }} +{{ end }} {{ end }} diff --git a/io/web/templates/layout.html b/io/web/templates/layout.html index aab1ce3..a85ea4a 100644 --- a/io/web/templates/layout.html +++ b/io/web/templates/layout.html @@ -67,8 +67,9 @@

Global Names Verifier

{{ end }}

- Code - on GitHub + + Version {{ .Version }} +

diff --git a/io/web/templates/results.html b/io/web/templates/results.html index e10794d..e015176 100644 --- a/io/web/templates/results.html +++ b/io/web/templates/results.html @@ -1,33 +1,33 @@ {{ define "results" }} -
+
-
- {{ if .Outlink }} - {{ .MatchedName }} - {{ else }} - {{ .MatchedName }} - {{ end }} - [ {{ matchType .MatchType .EditDistance }} ] -
+
+ {{ if .Outlink }} + 🌍 {{ .MatchedName }} + {{ else }} + {{ .MatchedName }} + {{ end }} + [ {{ matchType .MatchType .EditDistance }} ] +
- {{ if .CurrentName }} - {{ if ne .MatchedName .CurrentName }} -
- Current name: - {{ .CurrentName }} -
- {{ end }} - {{ end }} + {{ if .CurrentName }} + {{ if ne .MatchedName .CurrentName }} +
+ Current name: + {{ .CurrentName }} +
+ {{ end }} + {{ end }} - + - {{ if .ClassificationPath }} -
{{ classification .ClassificationPath .ClassificationRanks }}
- {{ end }} + {{ if .ClassificationPath }} +
{{ classification .ClassificationPath .ClassificationRanks }}
+ {{ end }} -
+
{{ end }} diff --git a/version.go b/version.go index 0e58a10..e8bce88 100644 --- a/version.go +++ b/version.go @@ -2,7 +2,7 @@ package gnverifier var ( // Version of the gnverifier - Version = "v0.5.1+" + Version = "v0.5.2+" // Build timestamp Build = "n/a" )