Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/trickl/language
Browse files Browse the repository at this point in the history
  • Loading branch information
trickl committed Oct 1, 2018
2 parents 4211d9e + 00d59cc commit e9b92c8
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 58 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

[![build_status](https://travis-ci.com/trickl/language.svg?branch=master)](https://travis-ci.com/trickl/language)
[![Maintainability](https://api.codeclimate.com/v1/badges/be4af1f4cc620e465849/maintainability)](https://codeclimate.com/github/trickl/language/maintainability)
[![Test Coverage](https://api.codeclimate.com/v1/badges/be4af1f4cc620e465849/test_coverage)](https://codeclimate.com/github/trickl/language/test_coverage)
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)

A set of short utility functions for parsing common English language constructs.
Expand Down
86 changes: 54 additions & 32 deletions src/main/java/com/trickl/language/EnglishDurationFormat.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ public final class EnglishDurationFormat {

private static final Map<String, ChronoUnit> UNIT_ALIASES;

private static final List<ChronoUnit> SORTED_UNITS;

private static final String FORMAT_PATTERN =
"^((\\p{Space}*(\\p{Digit}+))?\\p{Space}*(\\p{Alpha}+)\\p{Space}*).*";

public EnglishDurationFormat() {
this(ChronoUnit.MILLIS);
}
Expand All @@ -45,6 +50,11 @@ public EnglishDurationFormat(ChronoUnit accuracy) {
m.put("ns", ChronoUnit.NANOS);
m.put("nanoseconds", ChronoUnit.NANOS);
UNIT_ALIASES = Collections.unmodifiableMap(m);

SORTED_UNITS = new ArrayList<>(EnumSet.allOf(ChronoUnit.class));
SORTED_UNITS.remove(ChronoUnit.FOREVER);
SORTED_UNITS.remove(ChronoUnit.ERAS);
Collections.sort(SORTED_UNITS, Comparator.reverseOrder());
}

/**
Expand All @@ -54,17 +64,9 @@ public EnglishDurationFormat(ChronoUnit accuracy) {
* @param duration The duration to convert
* @return English representation
*/
public String format(Duration duration) {

// Order all the temporal unit
List<ChronoUnit> sortedUnits = new ArrayList<>(EnumSet.allOf(ChronoUnit.class));
sortedUnits.remove(ChronoUnit.FOREVER);
sortedUnits.remove(ChronoUnit.ERAS);

Collections.sort(sortedUnits, Comparator.reverseOrder());

public String format(Duration duration) {
List<String> formattedParts = new LinkedList<>();
for (ChronoUnit unit : sortedUnits) {
for (ChronoUnit unit : SORTED_UNITS) {
if (unit.compareTo(accuracy) == -1) {
break;
}
Expand All @@ -81,9 +83,7 @@ public String format(Duration duration) {
String unitName = unit.toString().toLowerCase();
if (unitSize == 1) {
// Singularize unit
unitName = unitName.replaceAll("ia$", "ium");
unitName = unitName.replaceAll("ries$", "ry");
unitName = unitName.replaceAll("s$", "");
unitName = singularize(unitName);
}
String formattedPart = unitSize + " " + unitName;
formattedParts.add(formattedPart);
Expand All @@ -107,33 +107,18 @@ public Duration parse(String text) throws ParseException {

while (!value.isEmpty()) {
Pattern amountAndUnitPattern =
Pattern.compile("^((\\p{Space}*(\\p{Digit}+))?\\p{Space}*(\\p{Alpha}+)\\p{Space}*).*");
Pattern.compile(FORMAT_PATTERN);
Matcher matcher = amountAndUnitPattern.matcher(value);
if (matcher.matches()) {
value = value.substring(matcher.group(1).length());
String unitName = matcher.group(4);
unitName = unitName.toLowerCase();

// Pluralize if necessary
if (unitName.endsWith("ium")) {
unitName = unitName.replaceAll("ium$", "ia");
} else if (unitName.endsWith("ry")) {
unitName = unitName.replaceAll("ry$", "ries");
} else if (!unitName.endsWith("s")) {
unitName = unitName + "s";
}

ChronoUnit unit;
if (UNIT_ALIASES.containsKey(unitName)) {
unit = UNIT_ALIASES.get(unitName);
} else {
try {
unit = Enum.valueOf(ChronoUnit.class, unitName.toUpperCase());
} catch (IllegalArgumentException ex) {
throw new ParseException(ex.getMessage(), 0);
}
}
unitName = pluralize(unitName);

ChronoUnit unit = parseChronoUnit(unitName);

if (duration == null) {
duration = Duration.ZERO;
}
Expand All @@ -151,4 +136,41 @@ public Duration parse(String text) throws ParseException {

return duration;
}

private String pluralize(String noun) {
if (noun.endsWith("ium")) {
return noun.replaceAll("ium$", "ia");
} else if (noun.endsWith("ry")) {
return noun.replaceAll("ry$", "ries");
} else if (!noun.endsWith("s")) {
return noun + "s";
}
return noun;
}

private String singularize(String noun) {
if (noun.endsWith("ia")) {
return noun.replaceAll("ia$", "ium");
} else if (noun.endsWith("ries")) {
return noun.replaceAll("ries$", "ry");
} else if (noun.endsWith("s")) {
return noun.replaceAll("s$", "");
}
return noun;
}

private ChronoUnit parseChronoUnit(String text) throws ParseException {
ChronoUnit unit;
if (UNIT_ALIASES.containsKey(text)) {
unit = UNIT_ALIASES.get(text);
} else {
try {
unit = Enum.valueOf(ChronoUnit.class, text.toUpperCase());
} catch (IllegalArgumentException ex) {
throw new ParseException(ex.getMessage(), 0);
}
}

return unit;
}
}
42 changes: 16 additions & 26 deletions src/main/java/com/trickl/language/EnglishNumberParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,20 @@ private enum NumberLiteral {
keywords -> numberEquals(keywords, NumberLiteral.BILLION);
private static final Function<Terminals, Parser<BigDecimal>> TRILLION =
keywords -> numberEquals(keywords, NumberLiteral.TRILLION);

private static final Function<Terminals, Parser<BigDecimal>> multiplyUnit(
Function<Terminals, Parser<BigDecimal>> factor,
Function<Terminals, Parser<BigDecimal>> unit) {
return keywords ->
Parsers.sequence(
factor.apply(keywords).optional(BigDecimal.ONE),
unit.apply(keywords),
(a, b) -> a.multiply(b));
}

private static final Function<Terminals, Parser<BigDecimal>> HUNDREDS =
keywords ->
Parsers.sequence(
ONE_TO_9.apply(keywords).optional(BigDecimal.ONE),
HUNDRED.apply(keywords),
(a, b) -> a.multiply(b));

multiplyUnit(ONE_TO_9, HUNDRED);

private static final Function<Terminals, Parser<BigDecimal>> TWENTY_TO_99 =
keywords ->
Parsers.sequence(
Expand All @@ -132,32 +138,16 @@ private enum NumberLiteral {
(a, b) -> a.add(b));

private static final Function<Terminals, Parser<BigDecimal>> THOUSANDS =
keywords ->
Parsers.sequence(
ONE_TO_999.apply(keywords).optional(BigDecimal.ONE),
THOUSAND.apply(keywords),
(a, b) -> a.multiply(b));
multiplyUnit(ONE_TO_999, THOUSAND);

private static final Function<Terminals, Parser<BigDecimal>> MILLIONS =
keywords ->
Parsers.sequence(
ONE_TO_999.apply(keywords).optional(BigDecimal.ONE),
MILLION.apply(keywords),
(a, b) -> a.multiply(b));
multiplyUnit(ONE_TO_999, MILLION);

private static final Function<Terminals, Parser<BigDecimal>> BILLIONS =
keywords ->
Parsers.sequence(
ONE_TO_999.apply(keywords).optional(BigDecimal.ONE),
BILLION.apply(keywords),
(a, b) -> a.multiply(b));
multiplyUnit(ONE_TO_999, BILLION);

private static final Function<Terminals, Parser<BigDecimal>> TRILLIONS =
keywords ->
Parsers.sequence(
ONE_TO_999.apply(keywords).optional(BigDecimal.ONE),
TRILLION.apply(keywords),
(a, b) -> a.multiply(b));
multiplyUnit(ONE_TO_999, TRILLION);

private static final Function<Terminals, Parser<BigDecimal>> ALL_POS_NUMBERS =
keywords ->
Expand Down

0 comments on commit e9b92c8

Please sign in to comment.