From 58f6cab04b0a27e2656aaf8c023889c5c1c0204b Mon Sep 17 00:00:00 2001
From: Dmitry Mozzherin <dmozzherin@gmail.com>
Date: Thu, 7 Nov 2024 17:52:21 +0000
Subject: [PATCH] add nom. code option (close #265)

To help with ambiguous cases, for example when it is not
clear if `Aus (Bus) cus` has a genus Author `Bus` (bot.)
or it is a `Bus` is a subgenus of `Aus` (zool.). It also
deprecates cultivar flag, which becomes another option for
the code flag. The cultivar flag will be kept for
backward compatibility.
---
 ent/nomcode/nomcode.go | 52 ++++++++++++++++++++++++++++++++++++++++++
 gnparser/cmd/root.go   | 25 ++++++++++++++++----
 2 files changed, 73 insertions(+), 4 deletions(-)
 create mode 100644 ent/nomcode/nomcode.go

diff --git a/ent/nomcode/nomcode.go b/ent/nomcode/nomcode.go
new file mode 100644
index 0000000..79caf97
--- /dev/null
+++ b/ent/nomcode/nomcode.go
@@ -0,0 +1,52 @@
+package nomcode
+
+import (
+	"log/slog"
+	"strings"
+)
+
+// Code represents a nomenclatural code.
+type Code int
+
+// Constants for different nomenclatural codes.
+const (
+	Unknown    Code = iota // Unknown code
+	Zoological             // Zoological code
+	Botanical              // Botanical code
+	Cultivar               // Cultivar code
+)
+
+// New creates a new Code from a string representation.
+// It accepts short codes ('b', 'z', 'c') and full names
+// ('botanical', 'zoological', 'cultivar') as well as
+// official abbreviations ('icn', 'iczn', 'icncp').
+// The input string is case-insensitive.
+func New(s string) Code {
+	sOrig := s
+	s = strings.ToLower(s)
+	switch s {
+	case "b", "bot", "botanical", "icn":
+		return Botanical
+	case "z", "zoo", "zoological", "iczn":
+		return Zoological
+	case "c", "cult", "cultivar", "icncp":
+		return Cultivar
+	default:
+		slog.Warn("Cannot determine nomenclatural code", "input", sOrig)
+		return Unknown
+	}
+}
+
+// String returns the official abbreviation of the nomenclatural code.
+func (c Code) String() string {
+	switch c {
+	case Zoological:
+		return "ICZN"
+	case Botanical:
+		return "ICN"
+	case Cultivar:
+		return "ICNCP"
+	default:
+		return ""
+	}
+}
diff --git a/gnparser/cmd/root.go b/gnparser/cmd/root.go
index 34d7974..e167a8c 100644
--- a/gnparser/cmd/root.go
+++ b/gnparser/cmd/root.go
@@ -139,7 +139,18 @@ func init() {
 		"maximum number of names in a batch send for processing.")
 
 	rootCmd.Flags().BoolP("cultivar", "C", false,
-		"include cultivar epithets and graft-chimeras in normalized and canonical outputs")
+		"parse according to  cultivar code ICNCP (DEPRECATED, use nomenclatural-code instead)")
+
+	codeHelp := `Modifies the parser's behavior in ambiguous cases, sometimes 
+introducing additional parsing rules.
+
+Accepted values are:
+  - 'bot', 'icn', 'botanical' for botanical code
+  - 'cult', 'icncp', 'cultivar' for cultivar code
+  - 'zoo', 'iczn', 'zoological' for zoological code
+
+If not set, the parser will attempt to determine the appropriate code/s.`
+	rootCmd.Flags().StringP("nomenclatural-code", "n", "", codeHelp)
 
 	rootCmd.Flags().BoolP("capitalize", "c", false,
 		"capitalize the first letter of input name-strings")
@@ -149,9 +160,15 @@ func init() {
 
 	rootCmd.Flags().BoolP("details", "d", false, "provides more details")
 
-	formatHelp := "sets output format. Can be one of:\n" +
-		"'csv', 'tsv', 'compact', 'pretty'\n" +
-		"default is 'csv'"
+	formatHelp := `Sets the output format.
+
+Accepted values are:
+  - 'csv': Comma-separated values
+  - 'tsv': Tab-separated values
+  - 'compact': Compact JSON format
+  - 'pretty': Human-readable JSON format
+
+If not set, the output format defaults to 'csv'.`
 	rootCmd.Flags().StringP("format", "f", "", formatHelp)
 
 	rootCmd.Flags().BoolP("ignore_tags", "i", false,