Skip to content

Commit

Permalink
[EN DateTimeV2] Fixed "one morning" incorrectly extracted as 1:00 AM (
Browse files Browse the repository at this point in the history
…#2681) (#2746)

* Fixed "one morning" incorrectly extracted as 1:00 AM (#2681)

* Added Duration AmbiguityFilter as per review

Co-authored-by: LionbridgeCS2 <[email protected]>
  • Loading branch information
aitelint and LionbridgeCS2 authored Nov 4, 2021
1 parent 70270a0 commit 52c16ac
Show file tree
Hide file tree
Showing 17 changed files with 419 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -755,6 +755,14 @@ public static class DateTimeDefinitions
{ @"^(apr|aug|dec|feb|jan|jul|jun|mar|may|nov|oct|sept?)$", @"([$%£&!?@#])(apr|aug|dec|feb|jan|jul|jun|mar|may|nov|oct|sept?)|(apr|aug|dec|feb|jan|jul|jun|mar|may|nov|oct|sept?)([$%£&@#])" },
{ @"^(to\s+date)$", @"\b((equals?|up)\s+to\s+date)\b" }
};
public static readonly Dictionary<string, string> AmbiguityTimeFiltersDict = new Dictionary<string, string>
{
{ @"^(\p{L}+|\d{1,2})(\s+(morning|afternoon|evening|night))?$", @"\b(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|\d{1,2})\s+(morning|afternoon|evening|night)\b" }
};
public static readonly Dictionary<string, string> AmbiguityDurationFiltersDict = new Dictionary<string, string>
{
{ @"night$", @"\bnight(\s*|-)(club|light|market|shift|work(er)?)s?\b" }
};
public static readonly IList<string> MorningTermList = new List<string>
{
@"morning"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using System.Collections.Generic;
using System.Collections.Immutable;
using System.Text.RegularExpressions;

Expand Down Expand Up @@ -127,5 +128,7 @@ public ArabicDurationExtractorConfiguration(IDateTimeOptionsConfiguration config
Regex IDurationExtractorConfiguration.ModPrefixRegex => ModPrefixRegex;

Regex IDurationExtractorConfiguration.ModSuffixRegex => ModSuffixRegex;

public Dictionary<Regex, Regex> AmbiguityFiltersDict => null;
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using System.Collections.Generic;
using System.Collections.Immutable;
using System.Text.RegularExpressions;

Expand Down Expand Up @@ -126,5 +127,7 @@ public DutchDurationExtractorConfiguration(IDateTimeOptionsConfiguration config)
Regex IDurationExtractorConfiguration.ModPrefixRegex => ModPrefixRegex;

Regex IDurationExtractorConfiguration.ModSuffixRegex => ModSuffixRegex;

public Dictionary<Regex, Regex> AmbiguityFiltersDict => null;
}
}
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using System.Collections.Generic;
using System.Collections.Immutable;
using System.Text.RegularExpressions;

using Microsoft.Recognizers.Definitions.English;
using Microsoft.Recognizers.Definitions.Utilities;
using Microsoft.Recognizers.Text.Number;

namespace Microsoft.Recognizers.Text.DateTime.English
Expand Down Expand Up @@ -127,5 +129,7 @@ public EnglishDurationExtractorConfiguration(IDateTimeOptionsConfiguration confi
Regex IDurationExtractorConfiguration.ModPrefixRegex => ModPrefixRegex;

Regex IDurationExtractorConfiguration.ModSuffixRegex => ModSuffixRegex;

public Dictionary<Regex, Regex> AmbiguityFiltersDict => DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityDurationFiltersDict);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Text.RegularExpressions;

using Microsoft.Recognizers.Definitions.English;
using Microsoft.Recognizers.Definitions.Utilities;

namespace Microsoft.Recognizers.Text.DateTime.English
{
Expand Down Expand Up @@ -146,6 +147,6 @@ public EnglishTimeExtractorConfiguration(IDateTimeOptionsConfiguration config)

public string TimeTokenPrefix => DateTimeDefinitions.TimeTokenPrefix;

public Dictionary<Regex, Regex> AmbiguityFiltersDict => null;
public Dictionary<Regex, Regex> AmbiguityFiltersDict => DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityTimeFiltersDict);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using Microsoft.Recognizers.Text.Utilities;
using DateObject = System.DateTime;

Expand Down Expand Up @@ -38,6 +39,9 @@ public List<ExtractResult> Extract(string text, DateObject reference)

var rets = Token.MergeAllTokens(tokens, text, ExtractorName);

// Remove common ambiguous cases
rets = FilterAmbiguity(rets, text);

// First MergeMultipleDuration then ResolveMoreThanOrLessThanPrefix so cases like "more than 4 days and less than 1 week" will not be merged into one "multipleDuration"
if (this.merge)
{
Expand Down Expand Up @@ -355,5 +359,26 @@ private List<ExtractResult> MergeMultipleDuration(string text, List<ExtractResul

return results;
}

private List<ExtractResult> FilterAmbiguity(List<ExtractResult> extractResults, string text)
{
if (this.config.AmbiguityFiltersDict != null)
{
foreach (var regex in this.config.AmbiguityFiltersDict)
{
foreach (var extractResult in extractResults)
{
if (regex.Key.IsMatch(extractResult.Text))
{
var matches = regex.Value.Matches(text).Cast<Match>();
extractResults = extractResults.Where(er => !matches.Any(m => m.Index < er.Start + er.Length && m.Index + m.Length > er.Start))
.ToList();
}
}
}
}

return extractResults;
}
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using System.Collections.Generic;
using System.Collections.Immutable;
using System.Text.RegularExpressions;

Expand Down Expand Up @@ -52,5 +53,7 @@ public interface IDurationExtractorConfiguration : IDateTimeOptionsConfiguration

IImmutableDictionary<string, long> UnitValueMap { get; }

Dictionary<Regex, Regex> AmbiguityFiltersDict { get; }

}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using System.Collections.Generic;
using System.Collections.Immutable;
using System.Text.RegularExpressions;

Expand Down Expand Up @@ -127,5 +128,7 @@ public FrenchDurationExtractorConfiguration(IDateTimeOptionsConfiguration config
Regex IDurationExtractorConfiguration.ModPrefixRegex => ModPrefixRegex;

Regex IDurationExtractorConfiguration.ModSuffixRegex => ModSuffixRegex;

public Dictionary<Regex, Regex> AmbiguityFiltersDict => null;
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using System.Collections.Generic;
using System.Collections.Immutable;
using System.Text.RegularExpressions;
using Microsoft.Recognizers.Definitions.German;
Expand Down Expand Up @@ -127,5 +128,7 @@ public GermanDurationExtractorConfiguration(IDateTimeOptionsConfiguration config
Regex IDurationExtractorConfiguration.ModPrefixRegex => ModPrefixRegex;

Regex IDurationExtractorConfiguration.ModSuffixRegex => ModSuffixRegex;

public Dictionary<Regex, Regex> AmbiguityFiltersDict => null;
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using System.Collections.Generic;
using System.Collections.Immutable;
using System.Text.RegularExpressions;

Expand Down Expand Up @@ -116,5 +117,7 @@ public HindiDurationExtractorConfiguration(IDateTimeOptionsConfiguration config)
Regex IDurationExtractorConfiguration.ModPrefixRegex => ModPrefixRegex;

Regex IDurationExtractorConfiguration.ModSuffixRegex => ModSuffixRegex;

public Dictionary<Regex, Regex> AmbiguityFiltersDict => null;
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using System.Collections.Generic;
using System.Collections.Immutable;
using System.Text.RegularExpressions;

Expand Down Expand Up @@ -116,5 +117,7 @@ public ItalianDurationExtractorConfiguration(IDateTimeOptionsConfiguration confi
Regex IDurationExtractorConfiguration.ModPrefixRegex => ModPrefixRegex;

Regex IDurationExtractorConfiguration.ModSuffixRegex => ModSuffixRegex;

public Dictionary<Regex, Regex> AmbiguityFiltersDict => null;
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using System.Collections.Generic;
using System.Collections.Immutable;
using System.Text.RegularExpressions;
using Microsoft.Recognizers.Definitions.Portuguese;
Expand Down Expand Up @@ -130,5 +131,7 @@ public PortugueseDurationExtractorConfiguration(IDateTimeOptionsConfiguration co
Regex IDurationExtractorConfiguration.ModPrefixRegex => ModPrefixRegex;

Regex IDurationExtractorConfiguration.ModSuffixRegex => ModSuffixRegex;

public Dictionary<Regex, Regex> AmbiguityFiltersDict => null;
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using System.Collections.Generic;
using System.Collections.Immutable;
using System.Text.RegularExpressions;
using Microsoft.Recognizers.Definitions.Spanish;
Expand Down Expand Up @@ -130,5 +131,7 @@ public SpanishDurationExtractorConfiguration(IDateTimeOptionsConfiguration confi
Regex IDurationExtractorConfiguration.ModPrefixRegex => ModPrefixRegex;

Regex IDurationExtractorConfiguration.ModSuffixRegex => ModSuffixRegex;

public Dictionary<Regex, Regex> AmbiguityFiltersDict => null;
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using System.Collections.Generic;
using System.Collections.Immutable;
using System.Text.RegularExpressions;

Expand Down Expand Up @@ -116,5 +117,7 @@ public TurkishDurationExtractorConfiguration(IDateTimeOptionsConfiguration confi
Regex IDurationExtractorConfiguration.ModPrefixRegex => ModPrefixRegex;

Regex IDurationExtractorConfiguration.ModSuffixRegex => ModSuffixRegex;

public Dictionary<Regex, Regex> AmbiguityFiltersDict => null;
}
}
8 changes: 8 additions & 0 deletions Patterns/English/English-DateTime.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1181,6 +1181,14 @@ AmbiguityFiltersDict: !dictionary
'^\d+m$': '^\d+m$'
'^(apr|aug|dec|feb|jan|jul|jun|mar|may|nov|oct|sept?)$': '([$%£&!?@#])(apr|aug|dec|feb|jan|jul|jun|mar|may|nov|oct|sept?)|(apr|aug|dec|feb|jan|jul|jun|mar|may|nov|oct|sept?)([$%£&@#])'
'^(to\s+date)$': '\b((equals?|up)\s+to\s+date)\b'
AmbiguityTimeFiltersDict: !dictionary
types: [ string, string ]
entries:
'^(\p{L}+|\d{1,2})(\s+(morning|afternoon|evening|night))?$': '\b(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|\d{1,2})\s+(morning|afternoon|evening|night)\b'
AmbiguityDurationFiltersDict: !dictionary
types: [ string, string ]
entries:
'night$': '\bnight(\s*|-)(club|light|market|shift|work(er)?)s?\b'
# For TimeOfDay resolution
MorningTermList: !list
types: [ string ]
Expand Down
Loading

0 comments on commit 52c16ac

Please sign in to comment.