From 48051e50242ededce01c56881dcecb79f1840623 Mon Sep 17 00:00:00 2001 From: Kristina Spurgin Date: Wed, 24 Jan 2024 15:24:16 -0500 Subject: [PATCH 1/9] Puts each string to STDOUT so you can see progress --- utils/translate_to_cspace_csv.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/translate_to_cspace_csv.rb b/utils/translate_to_cspace_csv.rb index f4738f89..42b360c9 100644 --- a/utils/translate_to_cspace_csv.rb +++ b/utils/translate_to_cspace_csv.rb @@ -82,6 +82,7 @@ def create_row(translated, translation, idx) CSV.open(outfile, "a") do |csvout| Emendate.batch_translate(strings, optargs) do |translation| translation.values.each_with_index do |translated, idx| + puts translation.orig row = create_row(translated, translation, idx) csvout << row.values_at(*HEADERS) end From c2865c9a2fe4a1c752c3506dc37d397f758d988e Mon Sep 17 00:00:00 2001 From: Kristina Spurgin Date: Wed, 24 Jan 2024 15:24:54 -0500 Subject: [PATCH 2/9] Add examples --- spec/support/examples.csv | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/spec/support/examples.csv b/spec/support/examples.csv index 1af54308..bbddc7fa 100644 --- a/spec/support/examples.csv +++ b/spec/support/examples.csv @@ -45,6 +45,7 @@ "circa 2002?","circa 2002?","circa ####?","approximate;uncertain;four_digit_year;year_granularity","ncm",,"0","2002","2002","2002-01-01","2002-12-31","approximate;uncertain",,"na","2002%","2002 (uncertain and approximate)" "circa2002","circa2002","circa####","approximate;four_digit_year;year_granularity","ncm",,"0","2002","2002","2002-01-01","2002-12-31","approximate",,"na","2002~","2002 (approximate)" "Possibly circa 1955-1956","Possibly circa 1955-1956","possibly circa ####-####","year_granularity;approximate;uncertain;range","wpl",,"0","1955","1956","1955-01-01","1956-12-31","approximate;uncertain",,"na",, +"c.55","c.55","c.##","year_granularity;approximate;two_digit_year","ccp",,"0","1955","1955","1955-01-01","1955-12-31","approximate, whole",,"na",, "probably 2002","probably 2002","probably ####","year_granularity;uncertain","wpl",,"0","2002","2002","2002-01-01","2002-12-31","uncertain",,"na","2002?","2002 (uncertain)" "probably March 2020","probably March 2020","probably MONTH ####","year_month_granularity;uncertain","wpl",,"0","2020-03","2020-03","2020-03-01","2020-03-31","uncertain",,"na","2020-03?","2020-03 (uncertain)" "1920 ca","1920 ca","#### ca","approximate","mmm",,"0","1920","1920","1920-01-01","1920-12-31","approximate",,"na","1920~","1920 (approximate)" @@ -310,3 +311,6 @@ "Y171010000S3","Y171010000S3","y#########s#","currently_unparseable;edtf;edtf2;letter_prefixed_year;significant_digits",,,"0","nilValue","nilValue","nilValue","nilValue",,,"Untokenizable sequences: 171010000",, "Y3388E2S3","Y3388E2S3","y####e#s#","currently_unparseable;edtf;edtf2;exponential_year;letter_prefixed_year;significant_digits",,,"0","nilValue","nilValue","nilValue","nilValue",,,"Unprocessable string",, "1947/ca. 1965","1947/ca. 1965","####/ca. ####","approximate;year_granularity;range","ccp",,"0","1947","1965","1947-01-01","1965-12-31","approximate, end",,"na",, +"1815 or later","1815 or later","#### or later","or_after;year_granularity;after_before","ccp","open_unknown_end_date: '2022-06-01'","0","1815","2022","1815-01-01","2022-06-01",,,,"[1815..]","1815 or a later date" +"before 11/1750","before 11/1750","before ##/####","after_before;year_month_granularity","ccp","open_unknown_start_date: '1600-02-15'","0","1600-02","1749-10","1600-02-15","1749-10-31",,,"na","[..1750-11]","1749-11 or an earlier date" +"Sept. 28, 1969","Sept. 28, 1969","MON. ##, ####","year_month_day_granularity","ccp",,"0","1969-09-28","1969-09-28","1969-09-28","1969-09-28",,,,"1969-09-28","1969-09-28" From e14805ea19efb6727d6792e30da1dcab71acd6ce Mon Sep 17 00:00:00 2001 From: Kristina Spurgin Date: Wed, 24 Jan 2024 15:36:33 -0500 Subject: [PATCH 3/9] Update pry --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index 0503d9f4..29ea2e54 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -86,7 +86,7 @@ GEM parser (3.3.0.3) ast (~> 2.4.1) racc - pry (0.14.1) + pry (0.14.2) coderay (~> 1.1) method_source (~> 1.0) racc (1.7.3) From 8c0e30507c9be021e283b079ebc5088a5b05a44a Mon Sep 17 00:00:00 2001 From: Kristina Spurgin Date: Wed, 24 Jan 2024 15:36:45 -0500 Subject: [PATCH 4/9] Ensure Sep., Sept., and September are supported --- lib/emendate/lexer.rb | 7 +++---- spec/emendate/lexer_spec.rb | 4 ++++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/emendate/lexer.rb b/lib/emendate/lexer.rb index f48ba48b..781f4c6d 100644 --- a/lib/emendate/lexer.rb +++ b/lib/emendate/lexer.rb @@ -46,10 +46,9 @@ def call(...) .join("|") + ")") months = "^(" + ([ Date::MONTHNAMES.compact, - Date::ABBR_MONTHNAMES.compact.map { |val| val + '\.?' }, - 'Sept\.?' - ].flatten - .join("|") + ")") + 'Sept\.?', + Date::ABBR_MONTHNAMES.compact.map { |val| val + '\.?' } + ].flatten.join("|") + ")") ordinals = "^(" + ORDINAL_INDICATORS.join("|") + ")" ALPHA = { diff --git a/spec/emendate/lexer_spec.rb b/spec/emendate/lexer_spec.rb index 982e6e6c..a9fa9092 100644 --- a/spec/emendate/lexer_spec.rb +++ b/spec/emendate/lexer_spec.rb @@ -16,8 +16,12 @@ "@" => [:unknown], "Sep. 1" => %i[month space number1or2], "cat" => [:unknown], + "Sept. 19, 1918" => %i[month space number1or2 comma space + number4], "Sep. 19, 1918" => %i[month space number1or2 comma space number4], + "September 19, 1918" => %i[month space number1or2 comma space + number4], "{..1984" => %i[curly_bracket_open double_dot number4], "{...1984" => %i[curly_bracket_open unknown number4], "- –" => %i[hyphen space hyphen], From ab7a8b719c0cd27338e82db574a26d6c69437625 Mon Sep 17 00:00:00 2001 From: Kristina Spurgin Date: Wed, 24 Jan 2024 15:43:38 -0500 Subject: [PATCH 5/9] Support pattern: c.## --- lib/emendate/date_part_tagger.rb | 5 +++++ spec/emendate/date_part_tagger_spec.rb | 10 ++++++++++ spec/emendate/date_segmenter_spec.rb | 9 +++++++++ 3 files changed, 24 insertions(+) diff --git a/lib/emendate/date_part_tagger.rb b/lib/emendate/date_part_tagger.rb index 46f5ccca..31b7faaf 100644 --- a/lib/emendate/date_part_tagger.rb +++ b/lib/emendate/date_part_tagger.rb @@ -64,6 +64,11 @@ def full_match_tagger proc { tag_numeric_month } when /^year number1or2$/ proc { tag_year_plus_numeric_month_season_or_year } + when /^number1or2$/ + proc do + year = Emendate::ShortYearHandler.call(result[0]) + result.replace_x_with_new(x: result[0], new: year) + end end end diff --git a/spec/emendate/date_part_tagger_spec.rb b/spec/emendate/date_part_tagger_spec.rb index 1f4d899e..a2b00654 100644 --- a/spec/emendate/date_part_tagger_spec.rb +++ b/spec/emendate/date_part_tagger_spec.rb @@ -27,6 +27,16 @@ end end + context "with c.##" do + let(:string) { "c.55" } + + it "segments as expected" do + expect(types).to eq(%i[year]) + expect(result[0].literal).to eq(1955) + expect(result.lexeme).to eq(string) + end + end + context "with ####.#" do let(:string) { "2020.0" } diff --git a/spec/emendate/date_segmenter_spec.rb b/spec/emendate/date_segmenter_spec.rb index 8d2bcf7a..7be5ab0e 100644 --- a/spec/emendate/date_segmenter_spec.rb +++ b/spec/emendate/date_segmenter_spec.rb @@ -27,6 +27,15 @@ end end + context "with c.##" do + let(:string) { "c.55" } + + it "segments as expected" do + expect(types).to eq(%i[year_date_type]) + expect(result.lexeme).to eq(string) + end + end + context "with ####, ####" do let(:string) { "1997, 2000" } From 2ae8b3f09465f438811a3f344c5a3ccdd65de6ed Mon Sep 17 00:00:00 2001 From: Kristina Spurgin Date: Wed, 24 Jan 2024 16:42:05 -0500 Subject: [PATCH 6/9] Support pattern: #### or later --- lib/emendate/format_standardizer.rb | 10 ++++++++++ lib/emendate/lexer.rb | 2 +- spec/emendate/format_standardizer_spec.rb | 10 ++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/lib/emendate/format_standardizer.rb b/lib/emendate/format_standardizer.rb index 1670f541..6d61c371 100644 --- a/lib/emendate/format_standardizer.rb +++ b/lib/emendate/format_standardizer.rb @@ -117,6 +117,16 @@ def partial_match_standardizers end when /.*number4 hyphen number4 era_bce.*/ proc { copy_era_after_first_year } + when /.*or after$/ + proc do + oraft = result.extract(%i[or after]).segments + result.replace_x_with_derived_new_type( + x: oraft[0], type: :range_indicator + ) + result.replace_x_with_derived_new_type( + x: oraft[1], type: :unknown_date + ) + end end end diff --git a/lib/emendate/lexer.rb b/lib/emendate/lexer.rb index 781f4c6d..f6a1af9e 100644 --- a/lib/emendate/lexer.rb +++ b/lib/emendate/lexer.rb @@ -52,7 +52,7 @@ def call(...) ordinals = "^(" + ORDINAL_INDICATORS.join("|") + ")" ALPHA = { - /^(after|post)/i => :after, + /^(after|later|post)/i => :after, /^(&|and)/i => :and, /^(about|around|approximate(ly|)|ca\.?|circa|estimated?|est\.?)/i => :approximate, diff --git a/spec/emendate/format_standardizer_spec.rb b/spec/emendate/format_standardizer_spec.rb index ec5368fa..de3b17bb 100644 --- a/spec/emendate/format_standardizer_spec.rb +++ b/spec/emendate/format_standardizer_spec.rb @@ -245,6 +245,16 @@ end end + context "with #### or later" do + let(:string) { "1815 or later" } + + it "segments as expected" do + expect(subject.lexeme).to eq(string) + expect(result).to eq(%i[number4 range_indicator + rangedateendunknown_date_type]) + end + end + context "with MONTH ##-##, #### and MONTH ##, ####" do let(:string) { "July 13-15, 1997 and September 17, 1997" } From e0b592e75ddca2e221b8d27a822a371041bf0afd Mon Sep 17 00:00:00 2001 From: Kristina Spurgin Date: Wed, 24 Jan 2024 18:54:54 -0500 Subject: [PATCH 7/9] Handle forbidden modifier addition correctly --- lib/emendate/date_segmenter.rb | 25 ++++++++----------------- lib/emendate/date_types/datetypeable.rb | 4 ++-- lib/emendate/errors.rb | 2 +- spec/emendate/date_segmenter_spec.rb | 11 +++++++++++ 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/lib/emendate/date_segmenter.rb b/lib/emendate/date_segmenter.rb index 23fc41d8..85f90fc2 100644 --- a/lib/emendate/date_segmenter.rb +++ b/lib/emendate/date_segmenter.rb @@ -34,7 +34,9 @@ def call else transform_separators(separators) end - + rescue Emendate::ForbiddenSegmentAdditionError => e + Failure(e.to_s) + else Success(result) end @@ -113,26 +115,15 @@ def apply_modifier(type, direction) modifier = working[1] end - addable = datetype.addable?(type) - case direction when :forward - if addable - datetype.prepend_source_token(modifier) - else - add_as_unprocessable(modifier) - end + datetype.prepend_source_token(modifier) result << datetype working.shift(2) when :backward - if addable - datetype.append_source_token(modifier) - result << datetype - working.shift(2) - else - result << datetype - working.shift - end + datetype.append_source_token(modifier) + result << datetype + working.shift(2) end apply_modifiers(type) @@ -140,7 +131,7 @@ def apply_modifier(type, direction) def add_as_unprocessable(modifier) result << Emendate::Segment.new( - type: "unprocessable_#{modifier_type}", sources: [modifier] + type: "forbidden_#{modifier.type}", sources: [modifier] ) end diff --git a/lib/emendate/date_types/datetypeable.rb b/lib/emendate/date_types/datetypeable.rb index b4642514..5f14d134 100644 --- a/lib/emendate/date_types/datetypeable.rb +++ b/lib/emendate/date_types/datetypeable.rb @@ -42,7 +42,7 @@ module Datetypeable # @todo Rename to :prepend_source_segment def prepend_source_token(segment) unless addable?(segment.type) - raise Emendate::DisallowedTokenAdditionError.new( + fail Emendate::ForbiddenSegmentAdditionError.new( segment, __method__, self.class ) end @@ -57,7 +57,7 @@ def prepend_source_token(segment) # @param token [{Segment}] or subclasses of {Segment} def append_source_token(token) unless addable_token_types.include?(token.type) - raise Emendate::DisallowedTokenAdditionError.new( + fail Emendate::ForbiddenSegmentAdditionError.new( token, __method__, self.class ) end diff --git a/lib/emendate/errors.rb b/lib/emendate/errors.rb index 75aac45e..b302b2aa 100644 --- a/lib/emendate/errors.rb +++ b/lib/emendate/errors.rb @@ -28,7 +28,7 @@ def initialize(sources, message) end end - class DisallowedTokenAdditionError < StandardError + class ForbiddenSegmentAdditionError < StandardError include Emendate::Error def initialize(token, meth, klass) diff --git a/spec/emendate/date_segmenter_spec.rb b/spec/emendate/date_segmenter_spec.rb index 7be5ab0e..1343a9fa 100644 --- a/spec/emendate/date_segmenter_spec.rb +++ b/spec/emendate/date_segmenter_spec.rb @@ -18,6 +18,17 @@ end end + context "with early MONTH ##, ####" do + let(:string) { "early April 13, 1987" } + + it "fails" do + expect(subject.failure).to eq( + "Cannot prepend :partial segment to "\ + "Emendate::DateTypes::YearMonthDay sources" + ) + end + end + context "with ####, MON ##" do let(:string) { "2020, Feb 15" } From 8de3ac584f45be717c05570f2a7c2cb63a5ad86e Mon Sep 17 00:00:00 2001 From: Kristina Spurgin Date: Wed, 24 Jan 2024 19:32:57 -0500 Subject: [PATCH 8/9] Refactor date type earliest/latest to datetypable --- lib/emendate/date_types/century.rb | 38 ++++----- lib/emendate/date_types/datetypeable.rb | 50 +++++++++++- lib/emendate/date_types/decade.rb | 43 ++++------ lib/emendate/date_types/millennium.rb | 40 +++++++--- lib/emendate/date_types/range.rb | 6 -- lib/emendate/date_types/year.rb | 78 ++----------------- lib/emendate/date_types/year_month.rb | 32 ++++++-- lib/emendate/date_types/year_month_day.rb | 15 ++-- lib/emendate/date_types/year_season.rb | 30 +++---- .../date_types/year_month_day_spec.rb | 43 ++++++++-- spec/emendate/date_types/year_month_spec.rb | 60 ++++++++++++-- spec/emendate/date_types/year_spec.rb | 12 +-- 12 files changed, 260 insertions(+), 187 deletions(-) diff --git a/lib/emendate/date_types/century.rb b/lib/emendate/date_types/century.rb index 78966a59..7ed43548 100644 --- a/lib/emendate/date_types/century.rb +++ b/lib/emendate/date_types/century.rb @@ -27,12 +27,6 @@ def initialize(sources:) @granularity_level = :year end - # @return [Date] - def earliest = Date.new(earliest_year, 1, 1) - - # @return [Date] - def latest = Date.new(latest_year, 12, 31) - # @return [true] def range? = true @@ -67,37 +61,39 @@ def get_set_type end end - def earliest_year - year = start_year - case partial_indicator + def earliest_detail + year = case partial_indicator when nil - year + start_year when :early - year + start_year when :mid - year + 33 + start_year + 33 when :late - year + 66 + start_year + 66 end + Date.new(year, 1, 1) end - def latest_year - year = start_year - case partial_indicator + def latest_detail + year = case partial_indicator when nil - year + 99 + start_year + 99 when :early - year + 33 + start_year + 33 when :mid - year + 66 + start_year + 66 when :late - year + 99 + start_year + 99 end + Date.new(year, -1, -1) end def start_year base = (literal.to_s + "00").to_i - (century_type == :name) ? base + 1 : base + return base unless century_type == :name + + base + 1 end end end diff --git a/lib/emendate/date_types/datetypeable.rb b/lib/emendate/date_types/datetypeable.rb index 5f14d134..3837bd21 100644 --- a/lib/emendate/date_types/datetypeable.rb +++ b/lib/emendate/date_types/datetypeable.rb @@ -11,9 +11,19 @@ module DateTypes # Classes including this module should define the following instance # methods: # - # * earliest (Date) - # * latest (Date) - # * range? (Boolean) + # * range? (Boolean, public) + # * addable_token_types(override, Array, private) + # + # For date types that return partial or before/after (range switch) date + # values, the following must be defined in order for the default, shared + # :earliest and :latest methods to work: + # + # * granularity_level (Symbol, public, options: :year, :year_month, + # :year_season, :year_month_day) + # * earliest_detail (Date, private) + # * latest_detail (Date, private) + # + # See {Year} for an fully implemented example. # # Validatable date types run specified checks on initialization and # raise a {Emendate::DateTypeCreationError} if any checks fail. Validatable @@ -114,6 +124,30 @@ def lexeme sources.empty? ? "" : sources.lexeme end + # @return [Date] + def earliest + return earliest_detail unless range_switch + + case range_switch + when :before + earliest_for_before + when :after + latest_detail.next + end + end + + # @return [Date] + def latest + return latest_detail unless range_switch + + case range_switch + when :before + earliest_detail.prev_day + when :after + Date.today + end + end + # @return [String] representation of earliest year def earliest_at_granularity = at_granularity(:earliest) @@ -229,6 +263,15 @@ def has_one_part_of_type(type) end end + # @return [Date] + def earliest_for_before + if Emendate.options.before_date_treatment == :point + latest + else + Emendate.options.open_unknown_start_date + end + end + def at_granularity(point) gl = get_granularity_level(point) return unless gl @@ -240,6 +283,7 @@ def at_granularity(point) when :year_month "#{full.year}-#{full.month.to_s.rjust(2, "0")}" when :year_season + "#{full.year}-#{full.month.to_s.rjust(2, "0")}" when :year_month_day "#{full.year}-#{full.month.to_s.rjust(2, "0")}-"\ "#{full.day.to_s.rjust(2, "0")}" diff --git a/lib/emendate/date_types/decade.rb b/lib/emendate/date_types/decade.rb index 1aa33f7e..d30f402a 100644 --- a/lib/emendate/date_types/decade.rb +++ b/lib/emendate/date_types/decade.rb @@ -25,17 +25,8 @@ def initialize(sources:) @granularity_level = :year end - def earliest - Date.new(earliest_year, 1, 1) - end - - def latest - Date.new(latest_year, 12, 31) - end - - def range? - true - end + # @return [true] + def range? = true private @@ -60,36 +51,36 @@ def set_literal end end - def decade_earliest_year + def start_year (literal.to_s + "0").to_i end - def earliest_year - year = decade_earliest_year - case partial_indicator + def earliest_detail + year = case partial_indicator when nil - year + start_year when :early - year + start_year when :mid - year + 4 + start_year + 4 when :late - year + 7 + start_year + 7 end + Date.new(year, 1, 1) end - def latest_year - year = decade_earliest_year - case partial_indicator + def latest_detail + year = case partial_indicator when nil - year + 9 + start_year + 9 when :early - year + 3 + start_year + 3 when :mid - year + 6 + start_year + 6 when :late - year + 9 + start_year + 9 end + Date.new(year, -1, -1) end end end diff --git a/lib/emendate/date_types/millennium.rb b/lib/emendate/date_types/millennium.rb index aa974f93..35f76d0b 100644 --- a/lib/emendate/date_types/millennium.rb +++ b/lib/emendate/date_types/millennium.rb @@ -23,16 +23,6 @@ def initialize(sources:) @granularity_level = :year end - def earliest - yr = "#{literal}000".to_i - Date.new(yr, 1, 1) - end - - def latest - yr = "#{literal}999".to_i - Date.new(yr, 12, 31) - end - def range? true end @@ -60,6 +50,36 @@ def set_literal datepart.literal end end + + def earliest_detail + year = case partial_indicator + when nil + start_year + when :early + start_year + when :mid + start_year + 333 + when :late + start_year + 666 + end + Date.new(year, 1, 1) + end + + def latest_detail + year = case partial_indicator + when nil + start_year + 999 + when :early + start_year + 333 + when :mid + start_year + 666 + when :late + start_year + 999 + end + Date.new(year, -1, -1) + end + + def start_year = (literal.to_s + "000").to_i end end end diff --git a/lib/emendate/date_types/range.rb b/lib/emendate/date_types/range.rb index d8134d7d..d56d490b 100644 --- a/lib/emendate/date_types/range.rb +++ b/lib/emendate/date_types/range.rb @@ -41,12 +41,6 @@ def earliest = startdate.earliest # @return [Date] def latest = enddate.latest - # @return [Date] - def earliest = startdate.earliest - - # @return [Date] - def latest = enddate.latest - # @return [true] def range? = true diff --git a/lib/emendate/date_types/year.rb b/lib/emendate/date_types/year.rb index ca315d1e..0baf5752 100644 --- a/lib/emendate/date_types/year.rb +++ b/lib/emendate/date_types/year.rb @@ -7,11 +7,15 @@ module DateTypes class Year include Datetypeable + # @return [:year] + attr_reader :granularity_level + # @param sources [SegmentSet, Array] Segments # included in the date type def initialize(sources:) common_setup(binding) @orig_literal = first_numeric_literal + @granularity_level = :year end # @return [Integer] @@ -21,9 +25,6 @@ def literal adjusted_literal * -1 end - # @return [:year] - def granularity_level = :year - # @return [true] def qualifiable? = true @@ -39,54 +40,12 @@ def range? true if partial_indicator || range_switch end - # @return [Date] - def earliest - return earliest_by_partial unless range_switch - - case range_switch - when :before - earliest_for_before - when :after - latest_by_partial.next - end - end - - # @return [String] - def earliest_at_granularity - return year_string unless range_switch - - case range_switch - when :before - year_string(earliest.year) - end - end - - # @return [Date] - def latest - return latest_by_partial unless range_switch - - case range_switch - when :before - earliest_by_partial.prev_day - when :after - Date.today - end - end - - # @return [String] - def latest_at_granularity - return year_string unless range_switch - - case range_switch - when :before - year_string(latest.year) - end - end - private attr_reader :orig_literal + def addable_token_types = %i[partial before after era_bce] + def validate parts = sources.date_part_types if parts.length > 1 @@ -115,18 +74,7 @@ def adjusted_literal end end - def year_string(val = literal) - if val >= 0 - val.to_s.rjust(4, "0") - else - base = val.to_s - .delete_prefix("-") - .rjust(4, "0") - "-#{base}" - end - end - - def earliest_by_partial + def earliest_detail case partial_indicator when nil Date.new(literal, 1, 1) @@ -139,15 +87,7 @@ def earliest_by_partial end end - def earliest_for_before - if Emendate.options.before_date_treatment == :point - latest - else - Emendate.options.open_unknown_start_date - end - end - - def latest_by_partial + def latest_detail case partial_indicator when nil Date.new(literal, 12, -1) @@ -159,8 +99,6 @@ def latest_by_partial Date.new(literal, 12, 31) end end - - def addable_token_types = %i[partial before after era_bce] end end end diff --git a/lib/emendate/date_types/year_month.rb b/lib/emendate/date_types/year_month.rb index 72640065..479233f1 100644 --- a/lib/emendate/date_types/year_month.rb +++ b/lib/emendate/date_types/year_month.rb @@ -30,12 +30,6 @@ def qualifiable? = true # @return [TrueClass] def validatable? = true - # @return [Date] - def earliest = Date.new(year, month, 1) - - # @return [Date] - def latest = Date.new(year, month, -1) - # @return [FalseClass] if no partial indicator or range switch is present, # OR if range_switch is :before and the before_date_treatment setting # is :point @@ -63,6 +57,32 @@ def process_qualifiers begin_and_end_qualifiers.each { |qual| add_qualifier_as_whole(qual) } segment_qualifier_processing(:year, :month) end + + def earliest_detail + case partial_indicator + when nil + Date.new(year, month, 1) + when :early + Date.new(year, month, 1) + when :mid + Date.new(year, month, 11) + when :late + Date.new(year, month, 21) + end + end + + def latest_detail + case partial_indicator + when nil + Date.new(year, month, -1) + when :early + Date.new(year, month, 10) + when :mid + Date.new(year, month, 20) + when :late + Date.new(year, month, -1) + end + end end end end diff --git a/lib/emendate/date_types/year_month_day.rb b/lib/emendate/date_types/year_month_day.rb index 80d48f02..e0156427 100644 --- a/lib/emendate/date_types/year_month_day.rb +++ b/lib/emendate/date_types/year_month_day.rb @@ -36,14 +36,6 @@ def qualifiable? = true # @return [TrueClass] def validatable? = true - def earliest - Date.new(year, month, day) - end - - def latest - earliest - end - def literal = "#{year}"\ "#{month.to_s.rjust(2, "0")}"\ "#{day.to_s.rjust(2, "0")}" @@ -63,6 +55,8 @@ def range? private + def addable_token_types = %i[before after] + def validate check_date_validity has_x_date_parts(3) @@ -85,6 +79,11 @@ def check_date_validity else self end + + def earliest_detail + Date.new(year, month, day) + end + alias_method :latest_detail, :earliest_detail end end end diff --git a/lib/emendate/date_types/year_season.rb b/lib/emendate/date_types/year_season.rb index bfbb4654..6868d7c7 100644 --- a/lib/emendate/date_types/year_season.rb +++ b/lib/emendate/date_types/year_season.rb @@ -64,24 +64,6 @@ def qualifiable? = true # @return [TrueClass] def validatable? = true - def earliest - return get_date(:start) unless include_prev_year - - Date.new(year - 1, 12, 1) - end - - def latest - get_date(:end) - end - - def earliest_at_granularity - "#{earliest.year}-#{earliest.month.to_s.rjust(2, "0")}" - end - - def latest_at_granularity - "#{latest.year}-#{latest.month.to_s.rjust(2, "0")}" - end - def range? !(partial_indicator.nil? && range_switch.nil?) end @@ -90,6 +72,8 @@ def range? attr_reader :year, :season, :seasons, :include_prev_year + def addable_token_types = %i[before after] + def validate has_x_date_parts(2) has_one_part_of_type(:year) @@ -102,6 +86,16 @@ def process_qualifiers segment_qualifier_processing(:year, :season) end + def earliest_detail + return get_date(:start) unless include_prev_year + + Date.new(year - 1, 12, 1) + end + + def latest_detail + get_date(:end) + end + # @param type [:start, :end] def get_date(type) recipe = { diff --git a/spec/emendate/date_types/year_month_day_spec.rb b/spec/emendate/date_types/year_month_day_spec.rb index 466205f5..d4bf67c1 100644 --- a/spec/emendate/date_types/year_month_day_spec.rb +++ b/spec/emendate/date_types/year_month_day_spec.rb @@ -13,26 +13,55 @@ let(:day) { 13 } let(:params) { {year: year, month: month, day: day, sources: tokens} } + context "when invalid date" do + let(:string) { "1844 Jun 31" } + let(:year) { 1844 } + let(:month) { 6 } + let(:day) { 31 } + + it "raises error" do + expect { subject }.to raise_error(Emendate::InvalidDateError) + end + end + context "when valid date" do let(:string) { "87-4-13" } it "creates expected datetype" do expect(subject.type).to eq(:yearmonthday_date_type) expect(subject.earliest).to eq(Date.new(1987, 4, 13)) + expect(subject.latest).to eq(Date.new(1987, 4, 13)) expect(subject.lexeme).to eq(string) expect(subject.literal).to eq(19870413) expect(subject.range?).to be_falsey end end - context "when invalid date" do - let(:string) { "1844 Jun 31" } - let(:year) { 1844 } - let(:month) { 6 } - let(:day) { 31 } + context "with before modifier" do + let(:string) { "Before Apr. 13, 1987" } - it "raises error" do - expect { subject }.to raise_error(Emendate::InvalidDateError) + context "when before date treated as range" do + before do + Emendate.config.options.before_date_treatment = :range + Emendate.config.options.open_unknown_start_date = "1600-02-15" + end + + it "returns as expected" do + expect(subject.earliest).to eq(Date.new(1600, 2, 15)) + expect(subject.latest).to eq(Date.new(1987, 4, 12)) + expect(subject.lexeme).to eq(string) + end + end + + context "when before date treated as point" do + before do + Emendate.config.options.before_date_treatment = :point + end + + it "returns as expected" do + expect(subject.latest).to eq(Date.new(1987, 4, 12)) + expect(subject.earliest).to eq(subject.latest) + end end end diff --git a/spec/emendate/date_types/year_month_spec.rb b/spec/emendate/date_types/year_month_spec.rb index f0dee55c..71d70628 100644 --- a/spec/emendate/date_types/year_month_spec.rb +++ b/spec/emendate/date_types/year_month_spec.rb @@ -22,7 +22,7 @@ end end - context "with `Feb. 2020`" do + context "with Feb. 2020" do let(:string) { "Feb. 2020" } it "returns as expected" do @@ -34,7 +34,55 @@ end end - context "with `[Feb. 2020]`" do + context "with before Feb. 2020" do + let(:string) { "before Feb. 2020" } + + context "when before date treated as range" do + before do + Emendate.config.options.before_date_treatment = :range + Emendate.config.options.open_unknown_start_date = "1600-02-15" + end + + it "returns as expected" do + expect(subject.type).to eq(:yearmonth_date_type) + expect(subject.earliest).to eq(Date.new(1600, 2, 15)) + expect(subject.latest).to eq(Date.new(2020, 1, 31)) + expect(subject.lexeme).to eq(string) + end + end + + context "when before date treated as point" do + before do + Emendate.config.options.before_date_treatment = :point + end + + it "returns as expected" do + expect(subject.latest).to eq(Date.new(2020, 1, 31)) + expect(subject.earliest).to eq(subject.latest) + end + end + end + + context "with after Feb. 2020" do + before { allow(Date).to receive(:today).and_return Date.new(2023, 6, 21) } + let(:string) { "after Feb. 2020" } + + it "returns as expected" do + expect(subject.earliest).to eq(Date.new(2020, 3, 1)) + expect(subject.latest).to eq(Date.new(2023, 6, 21)) + end + end + + context "with mid Feb. 2020" do + let(:string) { "mid Feb. 2020" } + + it "returns as expected" do + expect(subject.earliest).to eq(Date.new(2020, 2, 11)) + expect(subject.latest).to eq(Date.new(2020, 2, 20)) + end + end + + context "with [Feb. 2020]" do let(:string) { "[Feb. 2020]" } it "returns as expected" do @@ -42,7 +90,7 @@ end end - context "with `possibly 2020 February`" do + context "with possibly 2020 February" do let(:string) { "possibly 2020 February" } it "returns as expected" do @@ -51,7 +99,7 @@ end end - context "with `2020, possibly February`" do + context "with 2020, possibly February" do let(:string) { "2020, possibly February" } it "returns as expected" do @@ -59,7 +107,7 @@ end end - context "with `2020, February, possibly`" do + context "with 2020, February, possibly" do let(:string) { "2020, February, possibly" } it "returns as expected" do @@ -68,7 +116,7 @@ end end - context "with `2020-?02`" do + context "with 2020-?02" do let(:string) { "2020-?02" } it "returns as expected" do diff --git a/spec/emendate/date_types/year_spec.rb b/spec/emendate/date_types/year_spec.rb index 20409ac1..e141791a 100644 --- a/spec/emendate/date_types/year_spec.rb +++ b/spec/emendate/date_types/year_spec.rb @@ -151,9 +151,9 @@ expect(yr.literal).to eq(231) expect(yr.range?).to be_falsey expect(yr.earliest).to eq(Date.new(231, 1, 1)) - expect(yr.earliest_at_granularity).to eq("0231") + expect(yr.earliest_at_granularity).to eq("231") expect(yr.latest).to eq(Date.new(231, 12, 31)) - expect(yr.latest_at_granularity).to eq("0231") + expect(yr.latest_at_granularity).to eq("231") end context "with bce set" do @@ -168,9 +168,9 @@ expect(yr.literal).to eq(-230) expect(yr.range?).to be_falsey expect(yr.earliest).to eq(Date.new(-230, 1, 1)) - expect(yr.earliest_at_granularity).to eq("-0230") + expect(yr.earliest_at_granularity).to eq("-230") expect(yr.latest).to eq(Date.new(-230, 12, 31)) - expect(yr.latest_at_granularity).to eq("-0230") + expect(yr.latest_at_granularity).to eq("-230") end end @@ -183,9 +183,9 @@ expect(yr.literal).to eq(-231) expect(yr.range?).to be_falsey expect(yr.earliest).to eq(Date.new(-231, 1, 1)) - expect(yr.earliest_at_granularity).to eq("-0231") + expect(yr.earliest_at_granularity).to eq("-231") expect(yr.latest).to eq(Date.new(-231, 12, 31)) - expect(yr.latest_at_granularity).to eq("-0231") + expect(yr.latest_at_granularity).to eq("-231") end end end From 7c4b4fad0b2e113e247a1f35cb1defe65fa10b6c Mon Sep 17 00:00:00 2001 From: Kristina Spurgin Date: Wed, 24 Jan 2024 19:33:40 -0500 Subject: [PATCH 9/9] Support pattern: before ##/#### --- lib/emendate/date_part_tagger.rb | 6 ++++-- spec/emendate/date_part_tagger_spec.rb | 9 +++++++++ spec/support/examples.csv | 2 +- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/lib/emendate/date_part_tagger.rb b/lib/emendate/date_part_tagger.rb index 31b7faaf..c70a3a65 100644 --- a/lib/emendate/date_part_tagger.rb +++ b/lib/emendate/date_part_tagger.rb @@ -73,8 +73,10 @@ def full_match_tagger end def full_match_date_part_tagger - # case result.date_part_types.sort.join(" ") - # end + case result.date_part_types.join(" ") + when "number1or2 year" + proc { tag_numeric_month } + end end def partial_match_tagger diff --git a/spec/emendate/date_part_tagger_spec.rb b/spec/emendate/date_part_tagger_spec.rb index a2b00654..d95d5e7e 100644 --- a/spec/emendate/date_part_tagger_spec.rb +++ b/spec/emendate/date_part_tagger_spec.rb @@ -37,6 +37,15 @@ end end + context "with before ##/####" do + let(:string) { "before 11/1950" } + + it "segments as expected" do + expect(types).to eq(%i[before month year]) + expect(result.lexeme).to eq(string) + end + end + context "with ####.#" do let(:string) { "2020.0" } diff --git a/spec/support/examples.csv b/spec/support/examples.csv index bbddc7fa..68101352 100644 --- a/spec/support/examples.csv +++ b/spec/support/examples.csv @@ -312,5 +312,5 @@ "Y3388E2S3","Y3388E2S3","y####e#s#","currently_unparseable;edtf;edtf2;exponential_year;letter_prefixed_year;significant_digits",,,"0","nilValue","nilValue","nilValue","nilValue",,,"Unprocessable string",, "1947/ca. 1965","1947/ca. 1965","####/ca. ####","approximate;year_granularity;range","ccp",,"0","1947","1965","1947-01-01","1965-12-31","approximate, end",,"na",, "1815 or later","1815 or later","#### or later","or_after;year_granularity;after_before","ccp","open_unknown_end_date: '2022-06-01'","0","1815","2022","1815-01-01","2022-06-01",,,,"[1815..]","1815 or a later date" -"before 11/1750","before 11/1750","before ##/####","after_before;year_month_granularity","ccp","open_unknown_start_date: '1600-02-15'","0","1600-02","1749-10","1600-02-15","1749-10-31",,,"na","[..1750-11]","1749-11 or an earlier date" +"before 11/1750","before 11/1750","before ##/####","after_before;year_month_granularity","ccp","open_unknown_start_date: '1600-02-15'","0","1600-02","1750-10","1600-02-15","1750-10-31",,,"na","[..1750-11]","1750-11 or an earlier date" "Sept. 28, 1969","Sept. 28, 1969","MON. ##, ####","year_month_day_granularity","ccp",,"0","1969-09-28","1969-09-28","1969-09-28","1969-09-28",,,,"1969-09-28","1969-09-28"