Skip to content

Commit

Permalink
Merge pull request #16 from zoho/hawking_dev
Browse files Browse the repository at this point in the history
Parser Model Enhancement, New bug fixes, EUPL licence removed
  • Loading branch information
ArulVendhan authored May 11, 2021
2 parents 7ad23a7 + 23cb95a commit 931ed76
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 11 deletions.
6 changes: 1 addition & 5 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

<groupId>com.zoho</groupId>
<artifactId>hawking</artifactId>
<version>0.1.1</version>
<version>0.1.2</version>
<packaging>jar</packaging>
<name>Hawking</name>
<description>Hawking is a natural language date time parser that extracts date and time from text with context and parse to the required format.</description>
Expand Down Expand Up @@ -34,10 +34,6 @@
<name>GPL-v3.0</name>
<url>http://www.gnu.org/licenses/gpl-3.0.txt</url>
</license>
<license>
<name>EUPL-v1.1</name>
<url>http://joinup.ec.europa.eu/system/files/ES/EUPL%20v.1.1%20-%20Licencia.pdf</url>
</license>
</licenses>

<properties>
Expand Down
24 changes: 23 additions & 1 deletion src/main/java/com/zoho/hawking/language/english/Recognizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ public static ParsedDate recognize(String input) {
}
parsedDate.setOutputWithoffsets(dateList);
parsedDate.setTaggedWithXML(taggedWithXML.toString().trim());
parsedDate = tagAlternator(input, parsedDate);
parsedDate = tagShrinker(input, parsedDate);
LOGGER.info("Recoginzer Regex Tagged Sequence::::"+ parsedDate.getTaggedWithXML()+":::::");
return parsedDate;
Expand Down Expand Up @@ -81,7 +82,7 @@ public static Map<String, String> tagPredictor(String input, List<Triple<String,

return TagUtils.tagRegulator(input, tagList, tagsEach);
}
private static ParsedDate tagShrinker(String parseText, ParsedDate parserDateCurrent) {
private static ParsedDate tagShrinker(String parseText, ParsedDate parserDateCurrent) {
List<Triple<String, Integer, Integer>> triples = parserDateCurrent.getOutputWithOffsets();
for (int i = 0; i < triples.size(); i++) {
Triple<String, Integer, Integer> triple = triples.get(i);
Expand Down Expand Up @@ -122,6 +123,27 @@ private static ParsedDate tagShrinker(String parseText, ParsedDate parserDateCu
return parserDateCurrent;
}

private static ParsedDate tagAlternator(String parseText, ParsedDate parserDateCurrent) {
List<Triple<String, Integer, Integer>> triples = parserDateCurrent.getOutputWithOffsets();
String tag_xml = parserDateCurrent.getTaggedWithXML();
if ((tag_xml.contains("day_of_week") || tag_xml.contains("current_day")) && tag_xml.contains("month_of_year") && tag_xml.contains("exact_number")) {
List<Triple<String, Integer, Integer>> triple = parserDateCurrent
.getOutputWithOffsets();
String date_xml = parserDateCurrent.getTaggedWithXML();
for (int i = 0; i < triple.size(); i++) {
Triple<String, Integer, Integer> triplet = triples.get(i);
String tag = triplet.first();
if (tag.equals("day_of_week") || tag.equals("current_day")) {
String text = parseText.substring(triplet.second(), triplet.third());
triple.remove(i);
parserDateCurrent.setOutputWithoffsets(triple);
parserDateCurrent.setTaggedWithXML(date_xml.replace("<day_of_week>" + text + "</day_of_week>", "")); //No I18N
}
}
}
return parserDateCurrent;
}


}

10 changes: 8 additions & 2 deletions src/main/java/com/zoho/hawking/utils/DateTimeProperties.java
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,10 @@ private String removeTimeZone(String parsedText) {
for (String timezone : TimeZoneExtractor.timeZoneList) {
if (isContain(parsedText, timezone)) {
returnText = parsedText.replaceAll("\\b(?i)" + timezone + "\\b", ""); //No I18N
returnText = returnText.replaceAll(",", " "); //No I18N
returnText = returnText.replaceAll("([,“”\"~()@])", " "); //No I18N
returnText = returnText.replaceAll("(hrs|hr|Hr|Hrs)", " hrs"); //No I18N
returnText = returnText.replaceAll("date", "day"); //No I18N
returnText = returnText.replaceAll("nextweek", "next week"); //No I18N
returnText = returnText.replaceAll("Year", "year"); //No I18N
returnText = returnText.replaceAll("\\b(?i)" + "final", "last"); //No I18N
returnText = returnText.replaceAll("\\.$", " "); //No I18N
Expand All @@ -178,11 +180,14 @@ private String removeTimeZone(String parsedText) {
returnText = returnText.replaceAll("lunch", "1 PM"); //No I18N
returnText = returnText.replaceAll("dinner", "8 PM"); //No I18N
returnText = returnText.replaceAll("null",""); //No I18N
returnText = returnText.replaceAll("\\s{2,}", " ").trim();//No I18N
return returnText;
}
}
returnText = parsedText.replaceAll(",", " "); //No I18N
returnText = parsedText.replaceAll("([,“”\"~()@])", " "); //No I18N
returnText = returnText.replaceAll("(hrs|Hrs|Hr|hr)", " hrs"); //No I18N
returnText = returnText.replaceAll("date", "day"); //No I18N
returnText = returnText.replaceAll("nextweek", "next week"); //No I18N
returnText = returnText.replaceAll("Year", "year"); //No I18N
returnText = returnText.replaceAll("\\b(?i)" + "final", "last"); //No I18N
returnText = returnText.replaceAll("\\.$", " ").trim(); //No I18N
Expand All @@ -193,6 +198,7 @@ private String removeTimeZone(String parsedText) {
returnText = returnText.replaceAll("lunch", "1 PM"); //No I18N
returnText = returnText.replaceAll("dinner", "8 PM"); //No I18N
returnText = returnText.replaceAll("null",""); //No I18N
returnText = returnText.replaceAll("\\s{2,}", " ").trim();//No I18N
returnText = test.length() > 0 ? returnText : test;
return returnText;

Expand Down
6 changes: 3 additions & 3 deletions src/main/java/com/zoho/hawking/utils/RecognizerTagger.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
public class RecognizerTagger {

private final static Pattern implictPrefix = Pattern.compile(
"^(beginning|upcoming|starting|previous|current|between|coming|within|ending|before|until|after|since|start|forty|this|next|last|past|from|till|rest|most|with|the|for|few|end|in|at|on|of|an|a)$");
"^(beginning|upcoming|starting|previous|current|between|coming|within|ending|before|until|after|since|start|forty|this|next|last|past|from|till|rest|most|with|the|for|few|end|in|at|on|of|by|an|a)$");
private final static Pattern implictPostfix = Pattern.compile("^(back|ago)$");

private final static Pattern second_span = Pattern.compile("^(second|seconds|sec|secs)$");
Expand All @@ -29,10 +29,10 @@ public class RecognizerTagger {
private final static Pattern exact_date_2 = Pattern.compile("^(\\d{1,2})[-/]\\d{1,2}$");

private final static Pattern exact_time_1 = Pattern
.compile("^(((0[0-9]|1[0-9]|2[0-3]|[0-9])([:.][0-5][0-9])?([:.][0-5][0-9])?)([AaPp][Mm]))$");
.compile("^(((0[0-9]|1[0-9]|2[0-3]|[0-9])([:.][0-5][0-9])?([:.][0-5][0-9])?)([AaPp][.]?[Mm]))$");
private final static Pattern exact_time_2 = Pattern
.compile("^(((0[0-9]|1[0-9]|2[0-3]|[0-9])([:.][0-5][0-9])([:.][0-5][0-9])?))$");
private final static Pattern exact_time_3 = Pattern.compile("^([AaPp][Mm])$");
private final static Pattern exact_time_3 = Pattern.compile("^([AaPp][.]?[Mm])$");

private final static Pattern exact_year = Pattern.compile("^\\d{4}$");
private final static Pattern exact_number_1 = Pattern.compile(
Expand Down
Binary file modified src/main/resources/parser/parser.crf.ser.gz
100755 → 100644
Binary file not shown.

0 comments on commit 931ed76

Please sign in to comment.