From 58f6cab04b0a27e2656aaf8c023889c5c1c0204b Mon Sep 17 00:00:00 2001 From: Dmitry Mozzherin <dmozzherin@gmail.com> Date: Thu, 7 Nov 2024 17:52:21 +0000 Subject: [PATCH] add nom. code option (close #265) To help with ambiguous cases, for example when it is not clear if `Aus (Bus) cus` has a genus Author `Bus` (bot.) or it is a `Bus` is a subgenus of `Aus` (zool.). It also deprecates cultivar flag, which becomes another option for the code flag. The cultivar flag will be kept for backward compatibility. --- ent/nomcode/nomcode.go | 52 ++++++++++++++++++++++++++++++++++++++++++ gnparser/cmd/root.go | 25 ++++++++++++++++---- 2 files changed, 73 insertions(+), 4 deletions(-) create mode 100644 ent/nomcode/nomcode.go diff --git a/ent/nomcode/nomcode.go b/ent/nomcode/nomcode.go new file mode 100644 index 0000000..79caf97 --- /dev/null +++ b/ent/nomcode/nomcode.go @@ -0,0 +1,52 @@ +package nomcode + +import ( + "log/slog" + "strings" +) + +// Code represents a nomenclatural code. +type Code int + +// Constants for different nomenclatural codes. +const ( + Unknown Code = iota // Unknown code + Zoological // Zoological code + Botanical // Botanical code + Cultivar // Cultivar code +) + +// New creates a new Code from a string representation. +// It accepts short codes ('b', 'z', 'c') and full names +// ('botanical', 'zoological', 'cultivar') as well as +// official abbreviations ('icn', 'iczn', 'icncp'). +// The input string is case-insensitive. +func New(s string) Code { + sOrig := s + s = strings.ToLower(s) + switch s { + case "b", "bot", "botanical", "icn": + return Botanical + case "z", "zoo", "zoological", "iczn": + return Zoological + case "c", "cult", "cultivar", "icncp": + return Cultivar + default: + slog.Warn("Cannot determine nomenclatural code", "input", sOrig) + return Unknown + } +} + +// String returns the official abbreviation of the nomenclatural code. +func (c Code) String() string { + switch c { + case Zoological: + return "ICZN" + case Botanical: + return "ICN" + case Cultivar: + return "ICNCP" + default: + return "" + } +} diff --git a/gnparser/cmd/root.go b/gnparser/cmd/root.go index 34d7974..e167a8c 100644 --- a/gnparser/cmd/root.go +++ b/gnparser/cmd/root.go @@ -139,7 +139,18 @@ func init() { "maximum number of names in a batch send for processing.") rootCmd.Flags().BoolP("cultivar", "C", false, - "include cultivar epithets and graft-chimeras in normalized and canonical outputs") + "parse according to cultivar code ICNCP (DEPRECATED, use nomenclatural-code instead)") + + codeHelp := `Modifies the parser's behavior in ambiguous cases, sometimes +introducing additional parsing rules. + +Accepted values are: + - 'bot', 'icn', 'botanical' for botanical code + - 'cult', 'icncp', 'cultivar' for cultivar code + - 'zoo', 'iczn', 'zoological' for zoological code + +If not set, the parser will attempt to determine the appropriate code/s.` + rootCmd.Flags().StringP("nomenclatural-code", "n", "", codeHelp) rootCmd.Flags().BoolP("capitalize", "c", false, "capitalize the first letter of input name-strings") @@ -149,9 +160,15 @@ func init() { rootCmd.Flags().BoolP("details", "d", false, "provides more details") - formatHelp := "sets output format. Can be one of:\n" + - "'csv', 'tsv', 'compact', 'pretty'\n" + - "default is 'csv'" + formatHelp := `Sets the output format. + +Accepted values are: + - 'csv': Comma-separated values + - 'tsv': Tab-separated values + - 'compact': Compact JSON format + - 'pretty': Human-readable JSON format + +If not set, the output format defaults to 'csv'.` rootCmd.Flags().StringP("format", "f", "", formatHelp) rootCmd.Flags().BoolP("ignore_tags", "i", false,