-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
3 changed files
with
897 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
267 changes: 267 additions & 0 deletions
267
src/main/python/opensextant/resources/datetime_patterns_py.cfg
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,267 @@ | ||
/* | ||
* NOTICE | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
* | ||
* ************************************************************************** | ||
* NOTICE | ||
* This software was produced for the U. S. Government under Contract No. | ||
* W15P7T-12-C-F600, and is subject to the Rights in Noncommercial Computer | ||
* Software and Noncommercial Computer Software Documentation Clause | ||
* 252.227-7014 (JUN 1995) | ||
* | ||
* (c) 2009-2013 The MITRE Corporation. All Rights Reserved. | ||
* ************************************************************************** | ||
*/ | ||
|
||
// ALL Patterns below - defines, rules, etc. -- are for MATCHING. | ||
// Parsing of actual fields named in defines is done after matches are found. | ||
// Validation of parsed fields is last. | ||
|
||
# Well-known month abbreviations. | ||
#DEFINE MON_ABBREV JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEPT?|OCT|NOV|DEC | ||
|
||
# A name starting with 3 ASCII letters as above, but followed by other letters, possibly not English or ASCII. | ||
# Detection of month/day/year patterns with non-English month names is only a coincidence if they share a common prefix. | ||
# Locales for date patterns and language options could be explored further. But is beyond scope. | ||
#DEFINE MON_NAME JAN\w{0,6}|FEB\w{0,6}|MAR\w{0,2}|APR\w{0,3}|MAY|JUN\w{0,2}|JUL\w{0,2}|AUG\w{0,3}|SEP\w{0,6}|OCT\w{0,6}|NOV\w{0,6}|DEC\w{0,6} | ||
#DEFINE DAY_ENUM th|nd|rd|st | ||
|
||
# Fixed length fields | ||
// In all practicality, year is 1xxx or 2xxx. Years 0001 to 0999 not really considered. | ||
#DEFINE YEAR [12]\d{3} | ||
#DEFINE YY '?\d\d | ||
|
||
// Year/YY is 2-4 digits,... but could be 3. This is only used for matching. XTemp still validates matches. | ||
// '76 | ||
// | ||
#DEFINE YEARYY '?\d{2}|\d{4} | ||
#DEFINE MM [01]\d | ||
#DEFINE DD [0-3]\d | ||
#DEFINE SHORT_TZ [A-Z] | ||
|
||
#DEFINE hh [0-2]\d | ||
#DEFINE mm [0-5]\d | ||
#DEFINE ss [0-5]\d | ||
|
||
|
||
# Variable length | ||
#DEFINE DOM [0-3]?\d | ||
#DEFINE MONTH [01]?\d | ||
#DEFINE LONG_TZ [A-Z]{3,5} | ||
# Variable length Day/Month digits: | ||
#DEFINE DM1 [0-3]?\d | ||
#DEFINE DM2 [0-3]?\d | ||
|
||
#DEFINE OF of|Of|OF | ||
# Do not use DSEP? (that is an optional separator.) Most field-separated patterns would be too noisy. | ||
#DEFINE DSEP1 [-/.] | ||
#DEFINE DSEP2 [-/.] | ||
|
||
// ........................................ | ||
// Month, Day, Year patterns, MDY | ||
// ........................................ | ||
// FORM: DATE: MM/DD/YY | ||
#CLASS MDY opensextant.extractors.xtemporal.DateTimeMatch | ||
#RULE MDY 01 \b<DM1>/<DM2>/<YY>\b | ||
#TEST MDY 01 12/30/90 | ||
#TEST MDY 01 DATE: 12/30/90 | ||
#TEST MDY 01 DATE: 30/07/90 Jul 30, European locale | ||
#TEST MDY 01 30/12/15 Dec 30, European locale | ||
#TEST MDY 01 30/14/15 FAIL 14 is invalid | ||
#TEST MDY 01 13/30/90 FAIL bad MON | ||
#TEST MDY 01 12/32/90 FAIL bad DOM | ||
#TEST MDY 01 12/30/01 | ||
#TEST MDY 01 12/30/00 | ||
#TEST MDY 01 12/30/55 | ||
#TEST MDY 01 1/30/55 | ||
#TEST MDY 01 12/30/15 | ||
#TEST MDY 01 12/1/15 | ||
#TEST MDY 01 12/01/15 | ||
#TEST MDY 01 15/01/15 Jan 15, 2015 | ||
|
||
|
||
// FORM: DATE: MM/DD/YYYY | ||
#RULE MDY 02 \b<DM1>/<DM2>/<YEAR>\b | ||
#TEST MDY 02 12/30/1990 | ||
#TEST MDY 02 DATE: 12/30/1990 | ||
#TEST MDY 02 13/30/1990 FAIL bad MON | ||
#TEST MDY 02 12/32/1990 FAIL bad DOM | ||
#TEST MDY 02 12/30/2001 | ||
#TEST MDY 02 12/30/0000 FAIL bad YYYY | ||
#TEST MDY 02 12/30/1955 | ||
#TEST MDY 02 12/30/1915 | ||
|
||
// FORM: MMM DD, YYYY or MMM DD YYYY, MMM DD, YY, etc. | ||
#RULE MDY 03 \b<MON_NAME>[\s.]+<DOM>[\s,.]+<YEARYY>\b | ||
#TEST MDY 03 DEC 30, 1990 | ||
#TEST MDY 03 DEC 30 1990 | ||
#TEST MDY 03 DEC.30 1990 | ||
#TEST MDY 03 DEC. 30 1990 | ||
#TEST MDY 03 DEC.30.1990 | ||
#TEST MDY 03 DEC 30 90 | ||
#TEST MDY 03 DEC 30 990 FAIL bad year | ||
#TEST MDY 03 DEC 00 1990 FAIL bad DOM | ||
#TEST MDY 03 DEC 01 2300 FAIL ambiguous; time appears to be 2300 where year is expected | ||
#TEST MDY 03 DECEMBER 30 1990 | ||
#TEST MDY 03 DECMEBER 30 90 | ||
#TEST MDY 03 DECIEMBRE 30 1990 | ||
#TEST MDY 03 DECIEMBRE 00 1990 # FAIL no 00 day | ||
#TEST MDY 03 DECEMBER 01 2300 FAIL ambiguous; time appears to be 2300 where year is expected | ||
|
||
|
||
// FORM: MMM, YYYY or Month, YYYY comma optional. 4-digit year required | ||
#RULE MDY 04 \b<MON_NAME>[\s,.]+<YEAR>\b | ||
#TEST MDY 04 DEC 1990 | ||
#TEST MDY 04 DEC, 1990 | ||
#TEST MDY 04 DEC. 1990 | ||
#TEST MDY 04 DECEMBER, 1990 | ||
#TEST MDY 04 DECIEMBRE, 1990 | ||
#TEST MDY 04 DÉCEMBRE, 1990 | ||
#TEST MDY 04 DÉC, 1990 | ||
|
||
// FORM: MMM of YYYY | ||
#RULE MDY 04a \b<MON_NAME>\s+<OF>\s+<YEAR>\b | ||
#TEST MDY 04a DEC of 1990 | ||
#TEST MDY 04a DECEMBER of 1990 | ||
|
||
#RULE MDY 05 \b<DOM>\s+<MON_NAME>[\s,]+<YEARYY>\b | ||
#TEST MDY 05 30 DEC 1990 | ||
#TEST MDY 05 30 DEC 90 | ||
#TEST MDY 05 01 DEC 00 | ||
#TEST MDY 05 01 DEC 02 | ||
#TEST MDY 05 30 DECEMBER 1990 | ||
#TEST MDY 05 30 DECMEBER 1990 | ||
#TEST MDY 05 30 DECIEMBRE 1990 | ||
|
||
#RULE MDY 06a \b<MON_NAME>[\s.]+<DOM><DAY_ENUM>[\s,]+<YEAR>\b | ||
#TEST MDY 06a September 19th, 2017 | ||
#TEST MDY 06a September 19th, 17 # FAIL | ||
#TEST MDY 06a September 19 th, 17 # FAIL | ||
#TEST MDY 06a Sept. 19th, 2017 | ||
#TEST MDY 06a Sept 19th, 2017 | ||
#TEST MDY 06a Sept 1st, 2017 | ||
#TEST MDY 06a Sept 23rd, 2017 | ||
#TEST MDY 06a Sept 15th, 2017 | ||
#TEST MDY 06a Sept 22nd, 2017 | ||
|
||
#RULE MDY 06b \b<DOM><DAY_ENUM>\s+<OF>?\s*<MON_NAME>[\s,]+<YEAR>\b | ||
#TEST MDY 06b 19th September, 2017 | ||
#TEST MDY 06b 19th of September, 2017 | ||
|
||
#CLASS DMYT opensextant.extractors.xtemporal.DateTimeMatch | ||
#RULE DMYT 01 \b<DOM>\s+<MON_NAME>[\s,]+<YEARYY> <hh>:?<mm>\b | ||
#TEST DMYT 01 30 DEC 1990 0400 | ||
#TEST DMYT 01 30 DEC 90 0400 | ||
#TEST DMYT 01 11 JUN 14 1815 06:15 PM, 11 JUNE 2014 | ||
#TEST DMYT 01 25 March 2012 04:00 | ||
#TEST DMYT 01 25 March, 2012 04:00 | ||
|
||
|
||
|
||
// FORM: DATE: DD-MON-YYYY | ||
#CLASS DMY opensextant.extractors.xtemporal.DateTimeMatch | ||
#RULE DMY 01 \b<DOM>-<MON_NAME>-<YEARYY>\b | ||
#TEST DMY 01 12-DEC-90 | ||
#TEST DMY 01 12-DEC-1990 | ||
|
||
// FORM: DATE: DD MON YYYY | ||
#RULE DMY 02 \b<DOM>\s*<MON_NAME>\s*<YEARYY>\b | ||
#TEST DMY 02 12 DEC 90 | ||
#TEST DMY 02 12 DEC 1990 | ||
#TEST DMY 02 12DEC90 | ||
#TEST DMY 02 12DEC1990 | ||
#TEST DMY 02 12MARCH1999 | ||
#TEST DMY 02 12FEBBRAIO1999 | ||
#TEST DMY 02 12JUL1999 | ||
#TEST DMY 02 12JULIO1999 | ||
|
||
// FORM: DATE: YYYY-MM-DD as it appears in free text of documents. | ||
// The limitations of this pattern are related to how it was used. | ||
// This is a relatively modern format; was this format used in text in 1700s? | ||
#CLASS YMD opensextant.extractors.xtemporal.DateTimeMatch | ||
#RULE YMD 01 \b<YEAR><DSEP1><MM><DSEP2><DOM>\b | ||
#TEST YMD 01 2001-11-11 | ||
#TEST YMD 01 0001-04-34 # FAIL | ||
#TEST YMD 01 1001-04-30 # FAIL | ||
#TEST YMD 01 2001-04-30 | ||
#TEST YMD 01 1990-04-30 | ||
#TEST YMD 01 1790-04-30 # FAIL -- 1800 01 01 is earliest date for this pattern. | ||
#TEST YMD 01 a2001-04-30 # FAIL | ||
#TEST YMD 01 c2001-04-30 # FAIL | ||
#TEST YMD 01 42001-04-30 # FAIL | ||
|
||
|
||
// ........................................ | ||
// DATE TIME PATTERNS, DTM | ||
// ........................................ | ||
#CLASS DTM opensextant.extractors.xtemporal.DateTimeMatch | ||
|
||
// FORM: A|O|P|R DDHHMMZ MMM YY | ||
#RULE DTM 01 \b<DD><hh><mm><SHORT_TZ>\s*<MON_ABBREV>\s*<YY>\b | ||
#TEST DTM 01 A 301400Z DEC 90 | ||
#TEST DTM 01 R 301400Z DEC 90 | ||
#TEST DTM 01 A 351400Z DEC 90 # FAIL day out of range | ||
|
||
// FORM: YYYYMMDDTHHMMZ | ||
#RULE DTM 02 \b<YEAR><MM><DD>T<hh><mm><SHORT_TZ>\b | ||
#TEST DTM 02 20101230T1400Z | ||
|
||
// FORM: YYYYMMDDTHHMM ZZZ | ||
#RULE DTM 02a \b<YEAR><MM><DD>T<hh><mm> <LONG_TZ>\b | ||
#TEST DTM 02a 20101230T1400 EST # UTC-0500. parses to 1900 UTC | ||
#TEST DTM 02a 20101230T1400 BNT # UTC+0800. parses to 0600 UTC | ||
#TEST DTM 02a 20101230T1400 XXX # FAIL -- invalid TZ | ||
#TEST DTM 02a 20101230T1400 PST # UTC-0800. parses to UTC | ||
|
||
// FORM: YYYY-MM-DDTHH:MM:SS ... ISO Time. ISO 8601 uses "-", not "/". But should be validated in normalization. | ||
// DTM 04 is collapsed into this pattern. | ||
#RULE DTM 03 \b<YEAR><DSEP1><MM><DSEP2><DD>[T ]<hh>:<mm>:<ss> | ||
#TEST DTM 03 2010-12-30T14:00:01:12 | ||
#TEST DTM 03 2010-12-30T14:00:02 | ||
|
||
// FORM: YYYY-MM-DDTHH:MM ... ISO Time. See 03 above. | ||
#RULE DTM 03b \b<YEAR><DSEP1><MM><DSEP2><DD>[T ]<hh>:<mm> | ||
#TEST DTM 03b 2010-12-30T14:01:11:12 | ||
#TEST DTM 03b 2010-12-30T14:02:12 | ||
#TEST DTM 03b 2010-12/30T14:03:13 # FAIL | ||
|
||
// FORM: MM/DD/YY* HH:MM:SS. | ||
// TODO: 12-hour clock time and detect PM/AM. This HH:MM is only 24 hour clock. | ||
#RULE DTM 05a \b<MONTH><DSEP1><DOM><DSEP2><YEARYY>\s?<hh>:<mm>:<ss> | ||
#TEST DTM 05a 12-30-20 14:00:00:12 # extra data associated with value? | ||
#TEST DTM 05a 12-30-2020 14:00:01 | ||
#TEST DTM 05a 12/30-2020 14:00:02 # FAIL Test mixed punctuation. | ||
#TEST DTM 05a 12/30/2020 14:00:02 # Test mixed punctuation. | ||
#TEST DTM 05a 12/30/20 14:00 # Test mixed punctuation. | ||
|
||
// FORM: MM/DD/YY* HH:MM. | ||
// TODO: 12-hour clock time and detect PM/AM. This HH:MM is only 24 hour clock. | ||
#RULE DTM 05b \b<MONTH><DSEP1><DOM><DSEP2><YEARYY>\s?<hh>:<mm> | ||
#TEST DTM 05b 12-30-20 14:00:00:12 # extra data associated with value? | ||
#TEST DTM 05b 12-30-2020 14:01:01 | ||
#TEST DTM 05b 12/30-2020 14:02:02 # FAIL Test mixed punctuation. | ||
#TEST DTM 05b 12/30/2020 14:03:02 # Test mixed punctuation. | ||
#TEST DTM 05b 12/30/20 14:04 # Test mixed punctuation. | ||
#TEST DTM 05b 12.30.20 14:04 # European convention for Date. | ||
#TEST DTM 05b 2.30.20 14:04 # FAIL No 30 FEB. European convention for Date. Non-zero padded | ||
#TEST DTM 05b 2.30/2020 14:04 # FAIL No 30 FEB., Mixed Separators. European convention for Date. Non-zero padded. | ||
#TEST DTM 05b 4.30.20 14:04 # European convention for Date. Non-zero padded | ||
#TEST DTM 05b 4.30/2020 14:04 # FAIL Mixed Separators. European convention for Date. Non-zero padded. | ||
|
||
|
||
// 8-digit date is great when used in short spans of text; But as a general pattern, this matches any 8 digit number. | ||
// Not very accurate on data or large texts. | ||
// FORM: YYYYMMDD | ||
//#RULE DTM xx \b<YEAR><MM><DD>\b | ||
//#TEST DTM xx 20101230 | ||
|
628 changes: 628 additions & 0 deletions
628
src/main/python/opensextant/resources/geocoord_patterns_py.cfg
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,628 @@ | ||
/** | ||
* NOTICE | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
* | ||
* ************************************************************************** | ||
* NOTICE | ||
* This software was produced for the U. S. Government under Contract No. | ||
* W15P7T-12-C-F600, and is subject to the Rights in Noncommercial Computer | ||
* Software and Noncommercial Computer Software Documentation Clause | ||
* 252.227-7014 (JUN 1995) | ||
* | ||
* (c) 2009-2013 The MITRE Corporation. All Rights Reserved. | ||
* ************************************************************************** | ||
*/ | ||
// HISTORY | ||
// ================= | ||
// 2012-07-19 MCU added range test case for DD-01, DM-01, DMS-01 | ||
// "98 degrees" test: "98 44 009 N, 292 24 03 E FAIL", for example. | ||
// 2012-08-08 MCU removed notion that LONGITUDE is [01][0-7]\d I don't rembember where I got that. | ||
// [01]?\d\d is sufficient and validation of match will be done on all matches. So these patterns can be simplified. | ||
// 0\d\d | ||
// \d{1,3}, etc. | ||
// 2012-10-10 MCU Looking at DMS patterns abmiguities. Order matters. Flexpat 1.1 now preserves order of appearance | ||
// in this file. First rule to fire a match will be considered best match, others with exact span will be duplicates. | ||
// | ||
// 2012-11-26 MCU DM and DMS patterns without hemisphere patterns are now supported. | ||
// Truth data included as a CSV file. See OpenSextant GeocoderEval for source and scripts. | ||
// | ||
// 2013-01-05 MCU Saw reports that had invisible 'guarded area marker char' (U+0096) as separator space between lat/lon | ||
// -- So latlonSep has range of allowable space or control chars U+0080 to U+00A0. | ||
// | ||
// 2019-12-17 MCU Added UTM reference and additional test cases. More needed, to cover range of Lat-band widths | ||
// | ||
// 2022-04-12 MCU Look for mismatched punctuation, again. "DEG: DEG M.s DEG M.s" | ||
// Pattern guidelines: use "\" not "\\" | ||
// To really mean "\\" -- escape a slash use <slash> | ||
// | ||
//Hemispheres | ||
#DEFINE hemiLat [NSns] | ||
#DEFINE hemiLon [EWew] | ||
// "-" as first char is not a range char. | ||
#DEFINE hemiLatSign [-+] | ||
#DEFINE hemiLonSign [-+] | ||
#DEFINE hemiLatPre [-+]|[NSns]\s? | ||
#DEFINE hemiLonPre [-+]|[EWew]\s? | ||
//space and punctuation | ||
#DEFINE slash \\ | ||
#DEFINE degSym [˚°º] | ||
// LAT DM separator -- removed ".", as D.M is a decimal degree pattern. | ||
// To specifically support D.M.S patterns best use a specific pattern for that one case. | ||
// LAT SEPARATORS | ||
#DEFINE dmLatSep \s?DEG|[-˚°º:]|\s | ||
#DEFINE msLatSep [-'´’′:.\s] | ||
//#DEFINE secLatSep ["'\u00B4\u2019\u201D\u2033\u2032\s]{1,2} | ||
|
||
// LON SEPARATORS -- same as LAT | ||
#DEFINE dmLonSep \s?DEG|[-˚°º:]|\s | ||
#DEFINE msLonSep [-'´’′:.\s] | ||
//#DEFINE secLonSep ["'\u00B4\u2019\u201D\u2033\u2032\s]{1,2} | ||
|
||
#DEFINE secLatSep ["'´’”″′\s]{0,2} | ||
#DEFINE secLonSep ["'´’”″′\s]{0,2} | ||
|
||
// #DEFINE latlonSepNoDash [;,/|=x\s\u0080-\u00A0] | ||
// Allows a "-" dash between LAT and LON, where dash is not a hemi sign | ||
// The pattern where a "Dash(-)" is used to separate a Lat/Lon will be its own pattern. | ||
// For example 0156N-4555W uses a dash, but has hemisphere symbol as ordinal. | ||
// +0156 - -4555 is problematic to detect | ||
#DEFINE latlonSep [;,/|=x\s\u0080\u0096\u00A0]? | ||
#DEFINE latlonSep3 [;,/|=x\s\u0080\u0096\u00A0]{1,3} | ||
#DEFINE latlonSep03 [;,/|=x\s\u0080\u0096\u00A0]{0,3} | ||
#DEFINE dmsSep [-.:\s] | ||
|
||
// SPACE: Coordinate patterns can break across lines ~ HTML, word docs, word wrapping, etc. | ||
// RULE: Allow no more than 3 whitespace chars in the general case; In some cases optional space may be worth testing | ||
//#DEFINE space \\s{1,3} | ||
//#DEFINE optSpace \\s{0,3} | ||
|
||
// Parts of a decimal degree Latitude/Longitude | ||
#DEFINE decDegLat \d?\d\.\d{1,20} | ||
#DEFINE decDegLon [0-1]?\d?\d\.\d{1,20} | ||
|
||
// degree decimal minute lat/lon; either: | ||
// MM.m..., | ||
// M.m..., | ||
// M-mm is MIN-SEC, not MIN.DECMIN | ||
// MM-mmm... | ||
// M-mmm... | ||
// NOT M-mm or MM-mm which implies Min/Sec, not decimal | ||
#DEFINE decMinLat [0-5]?\d\.\d{1,20} | ||
#DEFINE decMinLat3 [0-5]?\d[-.]\d{3,20} | ||
#DEFINE decMinLon [0-5]?\d\.\d{1,20} | ||
#DEFINE decMinLon3 [0-5]?\d[-.]\d{3,20} | ||
|
||
// Difference between DMS fixed-width and variable lenght fields could be accomplished by these greedy matchers | ||
// \d{1,2} or \d?, for example. | ||
// if 2 digits exist, then \d{1,2} will consume both, before matching next item in pattern. | ||
// Fear: too many variable or optional items will lead to a lot of false positives. | ||
// Later if this framework proves to work we could consolidate the variable length and fixed length patterns back to a common pattern. | ||
// For now trying to make this more accurate and explicit. | ||
|
||
// Parts of a DMS latitude/Longitude | ||
#DEFINE degLat \d{1,2} | ||
#DEFINE degLon [01]?\d?\d | ||
|
||
//Parts of a Degrees Minutes Seconds fractional seconds | ||
#DEFINE fractMinLat [-\.]\d{1,6} | ||
#DEFINE fractMinLon [-\.]\d{1,6} | ||
#DEFINE fractSecLat \.\d{1,6} | ||
#DEFINE fractSecLon \.\d{1,6} | ||
#DEFINE fractSecLatOpt \.?\d{1,6} | ||
#DEFINE fractSecLonOpt \.?\d{1,6} | ||
#DEFINE fractMinLat3 \d{3,6} | ||
#DEFINE fractMinLon3 \d{3,6} | ||
|
||
// Fixed-length patterns | ||
#DEFINE dmsDegLat [0-8]\d | ||
#DEFINE dmsDegLon [01]\d\d | ||
#DEFINE dmsMinLat [0-5]\d | ||
#DEFINE dmsMinLon [0-5]\d | ||
#DEFINE dmsSecLat [0-5]\d | ||
#DEFINE dmsSecLon [0-5]\d | ||
|
||
#DEFINE minLat [0-5]?\d | ||
#DEFINE secLat [0-5]?\d | ||
#DEFINE minLon [0-5]?\d | ||
#DEFINE secLon [0-5]?\d | ||
|
||
#DEFINE latMarker Lat\w*[:=]? | ||
#DEFINE lonMarker Lon\w*[:=]? | ||
|
||
// Parts of an MGRS and UTM | ||
#DEFINE UTMBand [A-HJ-NP-Z] | ||
#DEFINE UTMZone [0-5]?\d | ||
#DEFINE MGRSQuad [A-HJ-NP-Z][A-HJ-NP-V] | ||
#DEFINE MGRSZone [0-6]?\d\s?[C-HJ-NP-X] | ||
|
||
// http://en.wikipedia.org/wiki/Universal_Transverse_Mercator | ||
// UTM Easting is from 167,000 to 833,000 m | ||
// Northing is from 0 to 10,000,000 m ( <= 9,999,999m) | ||
#DEFINE UTMEasting \d{6} | ||
#DEFINE UTMNorthing \d{3,7} | ||
|
||
// MGRS precision is 1m. Quad is 100,000m sq so resolution is 5 digits + 5 digits with optional whitespace | ||
// 99999n 99999e -- in MGRS we never see "m" units or N/E denoted explicitly | ||
// Occassionally, newlines or whitespace are interspersed in offset | ||
// minimal: | ||
// dd | ||
// ddddd ddddd with an additional one or two white spaces. The offsets start and end with numbers. Only whitespace between is optional. | ||
// ddddd dddddd additional digit in Easting -- trailing 6th digit is a typo; trim off | ||
// dddddd ddddd additional digit in Northing -- trailing 6th digit is a typo; trim off | ||
// ddddddddddd Typo introduces ambiguity -- only correct thing is to split on halfway point +/- 1 digit and emit two answers | ||
// | ||
// "precise" pattern is too precise. | ||
#DEFINE Easting_Northing_Precise \d{2,5}\s?\d{2,5} | ||
#DEFINE Easting_Northing \d[\s\d]{0,10}\d | ||
|
||
|
||
//---------------------------RULES------------------------------------------------ | ||
|
||
# #RULE FAM ENUM PATTERN | PATTERN-WITH-GROUPS | ||
# #TEST FAM ENUM TEXT-WITH-COORD | ||
# #NEGATIVE FAM ENUM TEXT-WITH-INVALID-COORD --- UNUSED SO FAR. | ||
|
||
//.......................................... | ||
// MGRS | ||
//.......................................... | ||
// FORM: ZZZQQ ee* nn* | ||
// Z Q E N Zone (2-3 char), Quad (2 char), Easting (1-5 digits) Northing(1-5 digits) | ||
// http://en.wikipedia.org/wiki/Military_grid_reference_system | ||
// http://www.armystudyguide.com/content/Prep_For_Basic_Training/Prep_for_basic_land_navigation/locate-a-point-using-the-.shtml | ||
#CLASS MGRS opensextant.extractors.xcoord.MGRSMatch | ||
#RULE MGRS 01 \b<MGRSZone>\s?<MGRSQuad>\s{0,2}<Easting_Northing>\b | ||
#TEST MGRS 01 4QFJ 50007000 | ||
#TEST MGRS 01 38SMB4611036560 | ||
#TEST MGRS 01 38SMB46113656 | ||
#TEST MGRS 01 8SMB46113656 | ||
#TEST MGRS 01 38SMB 4611036560 | ||
#TEST MGRS 01 38SMB 46113656 | ||
#TEST MGRS 01 38SMB 461136560 # asymmetric | ||
#TEST MGRS 01 38SMB 46110 3656 | ||
#TEST MGRS 01 38SMB 4611 03656 | ||
#TEST MGRS 01 38SMB 4611 03656 60 | ||
#TEST MGRS 01 38SMB 4611 03656 4 km ## random numeric text following a northing. | ||
#TEST MGRS 01 38SMB 461 03656 4 km ## random numeric text following a northing. | ||
#TEST MGRS 01 38SMB 0461 0365 | ||
#TEST MGRS 01 38SMB 00461 00365 | ||
#TEST MGRS 01 38SMB 04610 03650 | ||
#TEST MGRS 01 38SMB 04610 0365 | ||
#TEST MGRS 01 38SMB 46110365 60 | ||
#TEST MGRS 01 38SMB 46110365 60 | ||
#TEST MGRS 01 38SMB 46110365 60000 # supposedly a large number follows -- pattern should not match. | ||
#TEST MGRS 01 38 SMB 4611 3656 | ||
#TEST MGRS 01 42 RPR 4611 3656 FAIL 42 RP* P not valid lat band in 42 longitudinal zone | ||
#TEST MGRS 01 42WDB 4611 3656 FAIL, D not a valid zone | ||
#TEST MGRS 01 63SNV 6567 7888 FAIL | ||
#TEST MGRS 01 00SNV 6567 7888 FAIL | ||
#TEST MGRS 01 0SNV 6567 7888 FAIL | ||
#TEST MGRS 01 37SCA211914648 FAIL Mismatched Northing/Easting. Disable XCoord MGRS_STRICT to rescue coordinate. | ||
# MGRS date patterns -- sometimes dates are valid MGRS other times they match the pattern, but are invalid MGRS locations. | ||
#TEST MGRS 01 20 PER 1000 FAIL - appears to be a rate of some sort. | ||
#TEST MGRS 01 10 JAN 1994 FAIL - appears to be a date of some sort. | ||
#TEST MGRS 01 10 JAN 94 FAIL - appears to be a date of some sort. | ||
#TEST MGRS 01 1 JAN 94 FAIL - appears to be a date of some sort. | ||
#TEST MGRS 01 01 JAN 94 FAIL - appears to be a date of some sort. past century | ||
#TEST MGRS 01 10 JAN 13 FAIL - appears to be a date of some sort. | ||
#TEST MGRS 01 1 JAN 13 FAIL - appears to be a date of some sort. | ||
#TEST MGRS 01 4 JUL 2008 FAIL - appears to be a date of some sort; and also invalid MGRS | ||
#TEST MGRS 01 01 JAN 13 FAIL - appears to be a date of some sort. current year, 2013 | ||
#TEST MGRS 01 14 JAN 00 FAIL - appears to be a date of some sort. year 2000 | ||
#TEST MGRS 01 14 JAN 30 FAIL - appears to be a date of some sort. year 1930? | ||
#TEST MGRS 01 14 EST 2030 FAIL - appears to be a date of some sort. year 2030? | ||
#TEST MGRS 01 14 EST 2008 FAIL - appears to be a date of some sort | ||
#TEST MGRS 01 14 EDT 2008 FAIL - appears to be a date of some sort | ||
#TEST MGRS 01 61 EDT 2008 FAIL - appears to be a date of some sort | ||
#TEST MGRS 01 7JAN13 1500 FAIL - obvious date, Jan. 7th, 2013, 3PM | ||
#TEST MGRS 01 17MAR921100 TEST is a date, but should pass if no MGRS filters are on. | ||
#TEST MGRS 01 38smb 00461 00365 FAIL - lower case MGRS grids should not match by default; it is so rare. | ||
|
||
//.......................................... | ||
// UTM | ||
//.......................................... | ||
// REFERENCE: https://en.wikipedia.org/wiki/Universal_Transverse_Mercator_coordinate_system | ||
// FORM: (?# ZZA DDDDDD DDDDDDD ) | ||
#CLASS UTM opensextant.extractors.xcoord.UTMMatch | ||
#RULE UTM 01 \b<UTMZone>\s?<UTMBand>\s?<UTMEasting>[mE]{0,2}\s?<UTMNorthing>[mN]{0,2}\b | ||
#TEST UTM 01 17N 699990 3333335 | ||
#TEST UTM 01 17S 699990 3333335 | ||
#TEST UTM 01 17N 630084 4833438 # Should be CN tower, as mentioned in wikipedia above. | ||
#TEST UTM 01 17T 699990 3333335 | ||
#TEST UTM 01 7S 699990 3333335 | ||
#TEST UTM 01 17N6999903333335 | ||
#TEST UTM 01 17 6999903333335 # FAIL ambiguous | ||
#TEST UTM 01 17N 699990m 3333335m | ||
#TEST UTM 01 17S 699990mE 3333335mN | ||
#TEST UTM 01 17N 699990Em 3333335Nm | ||
#TEST UTM 01 17X 699990e 3333335n Pass, but questionable lat band may contradict Northing; Northern Hemi, X lat | ||
#TEST UTM 01 17D 699990E 3333335N Ditto, Southern Hemi = D lat | ||
|
||
|
||
//.......................................... | ||
// Degrees Decimal minutes | ||
//.......................................... | ||
// TODO: Clean up all test cases below for DM and DMS | ||
// | ||
// 00 DD MMH-mmm DDD MMH-mmm | ||
// 01a DD MM H DDD MM H | ||
// 01a DD MM.mmm H DDD MM.mmm H | ||
// 01a DD DEG MM.m H DDD DEG MM.m H | ||
// 01b H DD MM H DDD MM | ||
// 01b H DD MM.mmm H DDD MM.mmm | ||
// | ||
// Variables: "x" or "BY" are equivalent separators to ";/,|(sp)" | ||
// Deg symbol, ', ", or '' are all optional, but if present, they suggest pattern is a coordinate, not just a pair of numbers. | ||
// Lon DDD is 1-3 numbers, Lat DD is 1-3 numbers, leading '0' optional. ' | ||
|
||
// Pattern logic for dealing with factional minutes; | ||
// work-in progress: | ||
// has separators? | ||
// YES => is sepator a "-" dash throughout? | ||
// YES => if fractMin length is 2 chars, then this is DMS, not DM.m | ||
// NO => is factMin prefixed with "-" and | ||
// | ||
// | ||
|
||
// FORM: DDMMH-mmm | ||
#CLASS DM opensextant.extractors.xcoord.DegMinMatch | ||
|
||
#RULE DM 00 \b<dmsDegLat>[-\s]?<dmsMinLat><hemiLat><fractMinLat><latlonSep><dmsDegLon>[-\s]?<dmsMinLon><hemiLon><fractMinLon> | ||
#TEST DM 00 4218N-009 10224W-003 | ||
#TEST DM 00 4218N.009 10224W.003 | ||
#TEST DM 00 42-18N-009 102-24W-003 | ||
#TEST DM 00 42 18N-009 102 24W-003 | ||
#TEST DM 00 42 18N-009102 24W-003 FAIL same pattern test as above, but no separator. | ||
#TEST DM 00 78 44N-009, 092 24N-003 FAIL, both hemispheres are N | ||
#TEST DM 00 98 44N-009, 292 24W-003 FAIL, XY out of range | ||
|
||
// VARIABLE LENGTH FIELDS | ||
// FORM: DDMMmmmHDDDMMmmmH | ||
#RULE DM 01a \b<degLat><dmLatSep>\s?<minLat><fractMinLat>?<msLatSep>?\s?<hemiLat><latlonSep3><degLon><dmLonSep>\s?<minLon><fractMinLon>?<msLonSep>?\s?<hemiLon> | ||
#TEST DM 01a 42 18.009N 102 24.003W | ||
#TEST DM 01a 42 18.00 N 102 24.00W | ||
#TEST DM 01a 42 18-00 N 102 24-00W | ||
#TEST DM 01a 42-18-09N; 102-24-03W # FAIL Ambiguous, but good test. This is valid DMS only. | ||
#TEST DM 01a 42 9-00 N 102 6-00W This pattern needs to be more explicit -- its own pattern?--- MM-mm used to represent MM.mm fractional minutes is tough, as it is just as easily MM-SS min/seconds; However MM-mmm is more obvious as fractional minutes. | ||
#TEST DM 01a 42 9.009 N 102 6.003W | ||
#TEST DM 01a 42 18-009N 102 24-003W | ||
#TEST DM 01a 42.18.009N 102.24.003W | ||
#TEST DM 01a 42-18-009N; 102-24-003W | ||
#TEST DM 01a 42° 18.009'N; 102° 24.003'W | ||
#TEST DM 01a 42° 18.009N; 102° 24.003W | ||
#TEST DM 01a 42°18.009N; 102°24.003W | ||
#TEST DM 01a 12° 18.009N; 2° 24.003W | ||
#TEST DM 01a 42-18-09N; 102-24-03W # FAIL: DMS?, MM-SS is not decimal Min MM.SS it is Min Sec | ||
#TEST DM 01a 42-18-009N; 102-24-003W # Clearly Decimal, as third slot is 3 digits, so it seems 18.009 Min, rather than 18min 009sec | ||
#TEST DM 01a 42.18.009N 102.24.003W | ||
#TEST DM 01a 3: 42.18N 102.24W # FAIL, avoid the 3: and let DD pattern pick this up. | ||
#TEST DM 01a 42.18.009N x 102.24.003W | ||
#TEST DM 01a 42.18.9999 N 102.24.5555 W | ||
#TEST DM 01a 42°18'N 102°24'W | ||
#TEST DM 01a 42° 18'N 102° 24'W | ||
#TEST DM 01a 42 DEG 18.0N 102 DEG 24.0W | ||
#TEST DM 01a 01DEG 44 N 101DEG 44 E | ||
#TEST DM 01a 42:18n 102:24w | ||
#TEST DM 01a 42:18n - 102:24w | ||
#TEST DM 01a 42:00N-102:18E | ||
|
||
|
||
// VARIABLE LENGTH FIELDS | ||
// FORM: DD MM.mmm H | ||
// lat/lon separator -- cannot use "-" (latlonSep) as it is ambiguous if optional sep with optional preceding hemisphere which one is which. | ||
// | ||
#RULE DM 01b \b<hemiLatPre>\s?<degLat><dmLatSep>\s?<minLat><fractMinLat>?<msLatSep>?<latlonSep3><hemiLonPre>\s?<degLon><dmLonSep>\s?<minLon><fractMinLon>?<msLonSep>? | ||
#TEST DM 01b N42 18.00 W102 24.00 | ||
#TEST DM 01b N 42 18.00 W 102 24.00 | ||
#TEST DM 01b N42 18-00 W102 24-00 | ||
#TEST DM 01b N42 18-00 x W102 24-00 | ||
#TEST DM 01b N 4218.0 W 10224.0 | ||
#TEST DM 01b N42.18.005 x W102.24.008 | ||
#TEST DM 01b N42 18-005 x W102 24-008 | ||
#TEST DM 01b +42° 18 -102° 24 | ||
#TEST DM 01b N42 18' W102 24' | ||
#TEST DM 01b +42 18 -102 24 # FAIL | ||
#TEST DM 01b N42.18.005 x W102.24.008 | ||
#TEST DM 01b N42:18 W102:24 | ||
#TEST DM 01b n42:18 w102:24 | ||
#TEST DM 01b N 01° 44' E 101° 22' | ||
#TEST DM 01b N 01° 44' - E 101° 22' | ||
#TEST DM 01b N 01° 44' - 101° 22' Could possibly not PASS, as lat hemisphere is North; ambiguous if lon hemi is West or if "-" dash | ||
#TEST DM 01b 01° 44' - -101° 22' A really obscure use of dash, next to a hemisphere sign. TESTING Only | ||
#TEST DM 01b -1 12-00-32 FAIL: lopsided fields and/or precision. Use of "-" separator is inconsistent. | ||
#TEST DM 01b -1-45 -12-00-32 FAIL | ||
|
||
|
||
// COPY OF 01a/b to allow for fixed-width min, sec, but w/out delimiters. | ||
// FORM: DDMM.m*H | ||
#RULE DM 02a \b<dmsDegLat><minLat><fractMinLat>\s?<hemiLat><latlonSep03><dmsDegLon><minLon><fractMinLon>\s?<hemiLon>\b | ||
#TEST DM 02a 4218.009N 10224.003W | ||
#TEST DM 02a 4209.009N 10206.003W | ||
#TEST DM 02a 4209.00N 10206.00W | ||
#TEST DM 02a 4209.0N 10206.0W | ||
#TEST DM 02a 4209.0N - 10206.0W #FAIL dash not allowed as separator currently. | ||
#TEST DM 02a 4209.0N x 10206.0W | ||
|
||
// FORM: HDDMM.m* | ||
#RULE DM 02b \b<hemiLat>\s?<dmsDegLat><dmsMinLat><fractMinLat><latlonSep03><hemiLon>\s?<dmsDegLon><dmsMinLon><fractMinLon>\b | ||
#TEST DM 02b N4218.0 W10224.0 | ||
#TEST DM 02b N4218.009W10224.003 | ||
#TEST DM 02b N4218-0018 W10224-0444 | ||
|
||
// Desc: FIXED, NO DELIM, FRACTIONAL MINUTES -- 03a/b | ||
#RULE DM 03a \b<dmsDegLat><dmsMinLat><fractMinLat3>\s?<hemiLat><latlonSep><dmsDegLon><dmsMinLon><fractMinLon3>\s?<hemiLon>\b | ||
#TEST DM 03a 4218009N10224003W | ||
#TEST DM 03a 4218009N;10224003W | ||
#TEST DM 03a 4218009N 10224003W | ||
|
||
#RULE DM 03b \b<hemiLat>\s?<dmsDegLat><dmsMinLat><fractMinLat3><latlonSep><hemiLon>\s?<dmsDegLon><dmsMinLon><fractMinLon3>\b | ||
#TEST DM 03b N4218009W10224003 | ||
#TEST DM 03b N4218009;W10224003 | ||
#TEST DM 03b N4218009 W10224003 | ||
|
||
|
||
// Finds DM or DM.m degree patterns that have no hemisphere. | ||
// TODO: Revisit why optional hemisphere poses a problem. For now we assume the lat/lon separator is not a dash | ||
// | ||
#RULE DM 03-deg \b<hemiLatSign>?<degLat><degSym>\s*<minLat><fractMinLat>?<msLatSep>?<latlonSep3><hemiLonSign>?<degLon><degSym>\s*<minLon><fractMinLon>?<msLonSep>?\b | ||
#TEST DM 03-deg 42° 18, 102° 24 | ||
#TEST DM 03-deg 42° 18' 102° 24' | ||
#TEST DM 03-deg 42°18 x 102° 24 | ||
#TEST DM 03-deg 42˚ 18.44, 102˚ 24.11 # Vary deg symbol | ||
#TEST DM 03-deg 42° 18.44' 102° 24.11' | ||
#TEST DM 03-deg 42°18.44 x 102° 24.11 | ||
#TEST DM 03-deg (+42°18.44 x 102° 24.11) | ||
#TEST DM 03-deg (+42°18.44 x +102° 24.11) | ||
#TEST DM 03-deg (42°18.44 x -102° 24.11) | ||
|
||
|
||
// desc: FIXED WIDTH FIELDS, NO SEP | ||
#RULE DM 04a \b<hemiLat><dmsDegLat><dmsMinLat><latlonSep><hemiLon><dmsDegLon><dmsMinLon> | ||
#TEST DM 04a N4218 W10224 | ||
|
||
// FORM: DDMMH DDMMH | ||
#RULE DM 04b \b<dmsDegLat><dmsMinLat>\s*<hemiLat><latlonSep><dmsDegLon><dmsMinLon>\s*<hemiLon> | ||
#TEST DM 04b 4218N 10224W | ||
|
||
// RANDOM PATTERNS: | ||
// FORM: /DDMMHd/DDDMMHd/ where d=checksum value | ||
#RULE DM 05 /<dmsDegLat><dmsMinLat><hemiLat>\d/<dmsDegLon><dmsMinLon><hemiLon>\d/ | ||
#TEST DM 05 /4218N4/10224W5/ | ||
|
||
#DEFINE xySep \s?[Xx/,]\s? | ||
|
||
// DM with minimal symbols. Required LAT/LON separator. Otherwise this is a list of numbers with +/- signs. | ||
// FORM: +/-DD MM.mmm x +/-DDD DD.DDD | ||
#RULE DM 06 <hemiLatSign><degLat>\s<decMinLat><xySep><hemiLonSign><degLon>\s<decMinLon> | ||
#TEST DM 06 +42 18.0 x -102 24.0 | ||
#TEST DM 06 +42 18.0 X -102 24.0 | ||
#TEST DM 06 +42 18.0 / -102 24.0 | ||
#TEST DM 06 +42 18.0, -102 24.0 | ||
|
||
// DOT patterns -- We are for now assuming | ||
// | ||
// DD.MM.SS DDD.MM.SS is DMS | ||
// DD.MM.mmm* DDD.MM.mmm* is DM | ||
// | ||
// and Longitude in these patterns is 3-digits | ||
// | ||
#RULE DM 01a-dot \b<dmsDegLat>\.<dmsMinLat>\.<fractMinLat3>\s?<hemiLat><latlonSep>*<dmsDegLon>\.<dmsMinLon>\.<fractMinLon3>\s?<hemiLon> | ||
#TEST DM 01a-dot 03.44.777N 111.10.044W | ||
#TEST DM 01a-dot 03.44.777 N 111.10.044 W | ||
#TEST DM 01a-dot 03.44.7778 N 111.10.0442 W | ||
|
||
#RULE DM 01b-dot \b<hemiLat>\s?<dmsDegLat>\.<dmsMinLat>\.<fractMinLat3><latlonSep>*<hemiLon>\s?<dmsDegLon>\.<dmsMinLon>\.<fractMinLon3> | ||
#TEST DM 01b-dot N03.44.777 W111.10.044 | ||
#TEST DM 01a-dot N 03.44.777 W 111.10.044 | ||
#TEST DM 01a-dot N 03.44.7778 W 111.10.0442 | ||
|
||
|
||
#DEFINE trivialSep \s{0,2} | ||
|
||
//.......................................... | ||
// Degrees Minutes Seconds | ||
//.......................................... | ||
// 01a DD MM SS H DDD MM SS H | ||
// 01b H DD MM SS H DDD MM SS | ||
#CLASS DMS opensextant.extractors.xcoord.DegMinSecMatch | ||
#RULE DMS 01a <latMarker>?\s*<degLat><dmLatSep>\s?<minLat><msLatSep>\s?<secLat><fractSecLat>?<secLatSep><trivialSep><hemiLat>\s?<latlonSep3><lonMarker>?\s*<degLon><dmLonSep>\s?<minLon><msLonSep>\s?<secLon><fractSecLon>?<secLonSep>\s?<hemiLon> | ||
// Beware unicode | ||
#TEST DMS 01a 42 18' 00N 102 24' 00W | ||
#TEST DMS 01a 42 18' 00”N 102 24' 00”W | ||
#TEST DMS 01a 2 8' 00"N 12 24' 00"W | ||
#TEST DMS 01a 42° 18' 00"N 102° 24' 00"W | ||
#TEST DMS 01a 42 18-00 N 102 24-00W | ||
#TEST DMS 01a 42 9-00 N 102 6-00W | ||
#TEST DMS 01a 34°22′24″N 49°14′27″E | ||
#TEST DMS 01a 34°22 24″N 49°14 27″E | ||
#TEST DMS 01a 34°22′24N 49°14′27E | ||
#TEST DMS 01a 34° 22′ 24″N 49° 14′ 27″E | ||
#TEST DMS 01a 34° 22' 24"N 49° 14' 27"E | ||
#TEST DMS 01a 34°22'24"N 49°14'27"E | ||
#TEST DMS 01a 34°22'24"N 49°14'27"E | ||
#TEST DMS 01a 34 22'24"N 49 14'27"E | ||
#TEST DMS 01a 34 22' 24"N 049 14' 27"E | ||
#TEST DMS 01a 34 22 24N 049 14 27E | ||
#TEST DMS 01a 34 22 24 N 049 14 27 E | ||
#TEST DMS 01a 34 22 24N 049 14 27E | ||
#TEST DMS 01a 34 22 24 N 49 14 27 E | ||
#TEST DMS 01a 98 44 009 N, 292 24 03 E FAIL | ||
#TEST DMS 01a 27:37:45N/82:42:10W // original | ||
#TEST DMS 01a 27 37' 45"N/82 42' 10"W // original | ||
#TEST DMS 01a "27° 37' 45’’N, 82° 42' 10’’W", // original | ||
#TEST DMS 01a "27° 37' 45’N, 82° 42' 10’W", // single second hash sym | ||
#TEST DMS 01a "27° 37' 45’’N 82° 42' 10’’W", // no lat/lon sep | ||
#TEST DMS 01a "27° 37 45N, 82° 42 10W" // no min hash. | ||
#TEST DMS 01a 01°44'55.5"N 101°22'33.0"E | ||
#TEST DMS 01a 01° 44' 55.5"N 101° 22' 33.0"E | ||
#TEST DMS 01a 01° 44' 55.5"N 101° 22' 33"E // asymmetric in decimal seconds precision | ||
#TEST DMS 01a 42 18' 00" -102 24' 00" | ||
#TEST DMS 01a GEO:(42° 18' 00" 102° 24' 00") | ||
#TEST DMS 01a GEO:42° 18' 00" 102° 24' 00" | ||
#TEST DMS 01a 03-04-05 12-11-10 Ambiguous date or coordinate -- very possibly neither. | ||
#TEST DMS 01a 03-04-05 12:11:10 FAIL - is a date/time pattern | ||
#TEST DMS 01a 99-04-05 12:11:10 FAIL - is a date/time pattern - part of 1999-04... | ||
#TEST DMS 01a 03-04-58 12:11:10 FAIL - is a date/time pattern - DOB 1958-03-04 | ||
#TEST DMS 01a 03-04-99 12:11:10 FAIL - is a date/time pattern - DOB 1958-03-04 | ||
#TEST DMS 01a 14-04-33 12:11:10 FAIL - is a date/time pattern | ||
#TEST DMS 01a Latitude: 41º58'46"N, Longitude: 87º54'20"W | ||
#TEST DMS 01a location 28˚22′24″N 46˚14′27″E | ||
|
||
|
||
// DMS 01 with hemisphere leading | ||
#RULE DMS 01b \b<hemiLatPre><degLat><dmLatSep>\s?<minLat><msLatSep>\s?<secLat><fractSecLat>?<secLatSep><latlonSep3><hemiLonPre><degLon><dmLonSep>\s?<minLon><msLonSep>\s?<secLon><fractSecLon>?<secLonSep>\b | ||
#TEST DMS 01b N01°44'55.5" E101°22'33.0" | ||
#TEST DMS 01b N01° 44' 55.5" E101° 22' 33.0" | ||
#TEST DMS 01b N 01° 44' 55.5" E 101° 22' 33.0" | ||
#TEST DMS 01b N42 18' 00" W102 24' 00" | ||
#TEST DMS 01b N02 8' 00" W012 24' 00" | ||
#TEST DMS 01b N42° 18' 00" W102° 24' 00" | ||
#TEST DMS 01b +42 18' 00" -102 24' 00" | ||
#TEST DMS 01b +42: 18' 00" -102 24' 00" # FAIL hmm. a coordinate pattern, but a test of mismatch punct | ||
#TEST DMS 01b 7: 12 22.33N 14 43.42W # PASS valid coordinate, but ignore the prefix 7: | ||
#TEST DMS 01b xx +42° 18' 00" -102° 24' 00" xx | ||
|
||
// DMS variation using ":" only. | ||
// Only if hemisphere sign is present will this match. | ||
#RULE DMS 01c \b<hemiLatSign><degLat>:<minLat>:<secLat>\s?<hemiLonSign><degLon>:<minLon>:<secLon>\b | ||
#TEST DMS 01c x +42:18:00 -102:24:00 x | ||
|
||
// DMS01 with fractional seconds, but no hemisphere symbol, just degree | ||
#RULE DMS 01-deg \b<hemiLatSign>?<degLat><degSym>\s?<minLat><msLatSep>\s?<secLat><fractSecLat>?<secLatSep><latlonSep3><hemiLonSign>?<degLon><degSym>\s?<minLon><msLonSep>\s?<secLon><fractSecLon>?<secLonSep>\b | ||
#TEST DMS 01-deg 01°44'55.5" 101°22'33.0" | ||
#TEST DMS 01-deg 01° 44' 55.5" 101° 22' 33.0" | ||
#TEST DMS 01-deg 01° 44' 55.5" 101° 22' 33" | ||
#TEST DMS 01-deg 01° 44' 55" 101° 22' 33.0" | ||
#TEST DMS 01-deg 01° 44' 55" -101° 22' 33.0" | ||
|
||
#RULE DMS 01a-dot \b<dmsDegLat>\.<dmsMinLat>\.<dmsSecLat>\s?<hemiLat><latlonSep><dmsDegLon>\.<dmsMinLon>\.<dmsSecLon>\s?<hemiLon> | ||
#TEST DMS 01a-dot 03.44.777N 111.10.044W | ||
#TEST DMS 01a-dot 03.44.777 N 111.10.044 W | ||
#TEST DMS 01a-dot 03.44.7778 N 111.10.0442 W | ||
|
||
#RULE DMS 01b-dot \b<hemiLat>\s?<dmsDegLat>\.<dmsMinLat>\.<dmsSecLat><latlonSep><hemiLon>\s?<dmsDegLon>\.<dmsMinLon>\.<dmsSecLon> | ||
#TEST DMS 01b-dot N03.44.777 W111.10.044 | ||
#TEST DMS 01a-dot N 03.44.777 W 111.10.044 | ||
#TEST DMS 01a-dot N 03.44.7778 W 111.10.0442 | ||
|
||
// DMS 01 with dot as a separator. | ||
// | ||
#RULE DMS 02a \b<degLat>\.<minLat>\.<secLat><secLatSep><hemiLat><latlonSep><degLon>\.<minLon>\.<secLon><secLonSep><hemiLon> | ||
#TEST DMS 02a 01.44.55N 055.44.33E | ||
|
||
#RULE DMS 02b \b<hemiLat><degLat>\.<minLat>\.<secLat><secLatSep><latlonSep><hemiLon><degLon>\.<minLon>\.<secLon><secLonSep> | ||
#TEST DMS 02b N01.44.55 E055.44.33 | ||
|
||
|
||
// DMS with NO SEPARATORS | ||
#RULE DMS 03a \b<dmsDegLat><dmsMinLat><dmsSecLat>\s?<hemiLat><latlonSep>*<dmsDegLon><dmsMinLon><dmsSecLon>\s?<hemiLon> | ||
#TEST DMS 03a 421800N 1022400W | ||
#TEST DMS 03a 5113N 00425E FAIL | ||
|
||
#RULE DMS 03b \b<hemiLatPre><dmsDegLat><dmsMinLat><dmsSecLat><latlonSep><hemiLonPre><dmsDegLon><dmsMinLon><dmsSecLon> | ||
#TEST DMS 03b N421800 W1022400 | ||
#TEST DMS 03b N421800W1022400 | ||
#TEST DMS 03b N421800/W1022400 | ||
|
||
// DMS NO SEPARATORS, decimal seconds. Ambiguous -- is additional precision in Fractional Minutes or fractional seconds? | ||
#RULE DMS 04a \b<dmsDegLat><dmsMinLat><dmsSecLat><fractSecLatOpt><hemiLat><latlonSep03><dmsDegLon><dmsMinLon><dmsSecLon><fractSecLonOpt><hemiLon> | ||
#TEST DMS 04a 4218001234N 10224001234W | ||
#TEST DMS 04a 4218001234N - 10224001234W | ||
#TEST DMS 04a 4218001234N x 10224001234W | ||
|
||
#RULE DMS 04b \b<hemiLat><dmsDegLat><dmsMinLat><dmsSecLat><fractSecLatOpt><latlonSep03><hemiLon><dmsDegLon><dmsMinLon><dmsSecLon><fractSecLonOpt> | ||
#TEST DMS 04b N4218001234 W10224001234 | ||
#TEST DMS 04b N4218001234 W10224001234 | ||
|
||
|
||
|
||
|
||
//.......................................... | ||
// Decimal Degrees | ||
//.......................................... | ||
#CLASS DD opensextant.extractors.xcoord.DecimalDegMatch | ||
|
||
// FORM: DD-xx, Decimal Deg, Preceding Hemisphere (a) H DD.DDDDDD° HDDD.DDDDDD°, optional deg symbol | ||
#RULE DD 01 \b<hemiLatPre>\s?<decDegLat><degSym>?<latlonSep3><hemiLonPre>\s?<decDegLon><degSym>? | ||
#TEST DD 01 N42.3, W102.4 | ||
#TEST DD 01 N 42.3, W 102.4 | ||
#TEST DD 01 N42.3°, W102.4° | ||
#TEST DD 01 N98.3°, W192.4° #FAIL test out lon deg validation. | ||
#TEST DD 01 N98.3°, W292.4° #FAIL test out lon deg validation. | ||
|
||
// FORM: DD-xx, Decimal Deg, Postpending Hemisphere (a) DD.DDDDDD°H DDD.DDDDDD°H, optional deg symbol | ||
#RULE DD 02 \b<decDegLat><degSym>?<hemiLat><latlonSep3><decDegLon><degSym>?<hemiLon>\b | ||
#TEST DD 02 42.3N; 102.4W | ||
#TEST DD 02 42.3°N; 102.4°W | ||
#TEST DD 02 3: 42.18N 102.24W # PASS, avoid the 3: | ||
|
||
// FORM: DD-xx, Decimal Deg, Preceding Hemisphere, +/- with deg symbol, (a) {-|+|}DD.DDDDDD° {-|+|}DDD.DDDDDD° | ||
#RULE DD 03 \b<hemiLatSign>?<decDegLat><degSym><latlonSep3><hemiLonSign>?<decDegLon><degSym> | ||
#TEST DD 03 +42.3°;-102.4° | ||
#TEST DD 03 42.3° x -102.4° | ||
#TEST DD 03 42.3° x 102.4° | ||
|
||
// FORM: DD-xx, Decimal Deg with explicit LAT: LON: markers, both hemisphere and degree symbols are optional | ||
#RULE DD 04 <latMarker>\s*<hemiLatPre>?<decDegLat><degSym>?<hemiLat>?<latlonSep3><lonMarker>\s*<hemiLonPre>?<decDegLon><degSym>?<hemiLon>? | ||
#TEST DD 04 Latitude: 42.3N x Longitude: 102.3W | ||
#TEST DD 04 Latitude: N42.3° x Longitude: W102.3° | ||
#TEST DD 04 Latitude: 42.3 Longitude: 102.3 | ||
#TEST DD 04 Latitude= -42.3 Longitude= 102.3 | ||
#TEST DD 04 Latitude= 42.3 Longitude= 102.3 | ||
#TEST DD 04 Latitude: 42.3 Longitude: 102.3 | ||
#TEST DD 04 Latitude -42.3 Longitude 102.3 | ||
#TEST DD 04 Lat:-42.3 Lon:102.3 | ||
#TEST DD 04 Lat= -42.3 Long= 102.3 | ||
|
||
// #RULE DD 04a \w+[:=]\s*<hemiLatPre>?<decDegLat><latlonSep3><hemiLonPre>?<decDegLon> | ||
// #TEST DD 04a COORD=-55.6, +134.700 | ||
|
||
//.......................................... | ||
// Degrees -- These patterns are so low-resolution, that for now it seems useless to allow these to run by default. | ||
// Its approximately 100 KM x 100 KM | ||
//.......................................... | ||
|
||
// FORM: DD-xx, Decimal Deg, Preceding Hemisphere (a) HDD° HDDD°, required deg symbol | ||
#RULE DD 05 \b<hemiLatPre>?<degLat><degSym><latlonSep3><hemiLonPre>?<degLon><degSym> | ||
#TEST DD 05 N42°, W102° | ||
#TEST DD 05 N42W102 #FAIL -- hemispheres are right, but text is too short and lack of separator leads to ambiguity. | ||
#TEST DD 05 -42°, 102° | ||
#TEST DD 05 N 42°, W 102° | ||
#TEST DD 05 -42°, +102° | ||
#TEST DD 05 -42°, 102° | ||
#TEST DD 05 -42°, 102° | ||
#TEST DD 05 +42°, -102° | ||
|
||
#RULE DD 06 \b<degLat><degSym>?\s?<hemiLat><latlonSep3><degLon><degSym>?\s?<hemiLon>\b | ||
#TEST DD 06 42°N, 102°N FAIL | ||
#TEST DD 06 42° N, 102° W | ||
#TEST DD 06 42 N, 102 W | ||
#TEST DD 06 42N x 102W | ||
#TEST DD 06 00 N 130 WA FAIL | ||
|
||
#RULE DD 07 \b<hemiLat>\s?<degLat><latlonSep3><hemiLon>\s?<degLon>\b | ||
#TEST DD 07 N42, W102 | ||
#TEST DD 07 N42 x W102 | ||
#TEST DD 07 N 42 W 102 | ||
|
||
//#RULE DD 08 \b<hemiLatSign>?<decDegLat>,\s?<hemiLonSign>?<decDegLon> | ||
//#TEST DD 08 text 54.67, -117 text # FAIL - no decimal longitude | ||
//#TEST DD 08 text 54.33, -117.81 text | ||
//#TEST DD 08 text +54.22, 117.71 text | ||
//#TEST DD 08 text +54.22, 045.71 text | ||
//#TEST DD 08 text +54.22, -045.71 text | ||
//#TEST DD 08 text 54.11,117.61 text | ||
//#TEST DD 08 YX = (54.11, 117.61) text # Coordinate tuple () | ||
|