Skip to content

Commit

Permalink
Release of python v1.6.2
Browse files Browse the repository at this point in the history
mubaldino committed Jun 11, 2024
1 parent 5991d29 commit 2a77424
Showing 3 changed files with 897 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/main/python/MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
include LICENSE
# NOTE - all items here are copied from Java src/main/resources (ant script)
# except -- *_py.cfg for xtemp and xcoord extractors
include opensextant/resources/geocoord_patterns_py.cfg
include opensextant/resources/country-names-2021.csv
include opensextant/resources/country-names-2015.csv
267 changes: 267 additions & 0 deletions src/main/python/opensextant/resources/datetime_patterns_py.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
/*
* NOTICE
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* **************************************************************************
* NOTICE
* This software was produced for the U. S. Government under Contract No.
* W15P7T-12-C-F600, and is subject to the Rights in Noncommercial Computer
* Software and Noncommercial Computer Software Documentation Clause
* 252.227-7014 (JUN 1995)
*
* (c) 2009-2013 The MITRE Corporation. All Rights Reserved.
* **************************************************************************
*/

// ALL Patterns below - defines, rules, etc. -- are for MATCHING.
// Parsing of actual fields named in defines is done after matches are found.
// Validation of parsed fields is last.

# Well-known month abbreviations.
#DEFINE MON_ABBREV JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEPT?|OCT|NOV|DEC

# A name starting with 3 ASCII letters as above, but followed by other letters, possibly not English or ASCII.
# Detection of month/day/year patterns with non-English month names is only a coincidence if they share a common prefix.
# Locales for date patterns and language options could be explored further. But is beyond scope.
#DEFINE MON_NAME JAN\w{0,6}|FEB\w{0,6}|MAR\w{0,2}|APR\w{0,3}|MAY|JUN\w{0,2}|JUL\w{0,2}|AUG\w{0,3}|SEP\w{0,6}|OCT\w{0,6}|NOV\w{0,6}|DEC\w{0,6}
#DEFINE DAY_ENUM th|nd|rd|st

# Fixed length fields
// In all practicality, year is 1xxx or 2xxx. Years 0001 to 0999 not really considered.
#DEFINE YEAR [12]\d{3}
#DEFINE YY '?\d\d

// Year/YY is 2-4 digits,... but could be 3. This is only used for matching. XTemp still validates matches.
// '76
//
#DEFINE YEARYY '?\d{2}|\d{4}
#DEFINE MM [01]\d
#DEFINE DD [0-3]\d
#DEFINE SHORT_TZ [A-Z]

#DEFINE hh [0-2]\d
#DEFINE mm [0-5]\d
#DEFINE ss [0-5]\d


# Variable length
#DEFINE DOM [0-3]?\d
#DEFINE MONTH [01]?\d
#DEFINE LONG_TZ [A-Z]{3,5}
# Variable length Day/Month digits:
#DEFINE DM1 [0-3]?\d
#DEFINE DM2 [0-3]?\d

#DEFINE OF of|Of|OF
# Do not use DSEP? (that is an optional separator.) Most field-separated patterns would be too noisy.
#DEFINE DSEP1 [-/.]
#DEFINE DSEP2 [-/.]

// ........................................
// Month, Day, Year patterns, MDY
// ........................................
// FORM: DATE: MM/DD/YY
#CLASS MDY opensextant.extractors.xtemporal.DateTimeMatch
#RULE MDY 01 \b<DM1>/<DM2>/<YY>\b
#TEST MDY 01 12/30/90
#TEST MDY 01 DATE: 12/30/90
#TEST MDY 01 DATE: 30/07/90 Jul 30, European locale
#TEST MDY 01 30/12/15 Dec 30, European locale
#TEST MDY 01 30/14/15 FAIL 14 is invalid
#TEST MDY 01 13/30/90 FAIL bad MON
#TEST MDY 01 12/32/90 FAIL bad DOM
#TEST MDY 01 12/30/01
#TEST MDY 01 12/30/00
#TEST MDY 01 12/30/55
#TEST MDY 01 1/30/55
#TEST MDY 01 12/30/15
#TEST MDY 01 12/1/15
#TEST MDY 01 12/01/15
#TEST MDY 01 15/01/15 Jan 15, 2015


// FORM: DATE: MM/DD/YYYY
#RULE MDY 02 \b<DM1>/<DM2>/<YEAR>\b
#TEST MDY 02 12/30/1990
#TEST MDY 02 DATE: 12/30/1990
#TEST MDY 02 13/30/1990 FAIL bad MON
#TEST MDY 02 12/32/1990 FAIL bad DOM
#TEST MDY 02 12/30/2001
#TEST MDY 02 12/30/0000 FAIL bad YYYY
#TEST MDY 02 12/30/1955
#TEST MDY 02 12/30/1915

// FORM: MMM DD, YYYY or MMM DD YYYY, MMM DD, YY, etc.
#RULE MDY 03 \b<MON_NAME>[\s.]+<DOM>[\s,.]+<YEARYY>\b
#TEST MDY 03 DEC 30, 1990
#TEST MDY 03 DEC 30 1990
#TEST MDY 03 DEC.30 1990
#TEST MDY 03 DEC. 30 1990
#TEST MDY 03 DEC.30.1990
#TEST MDY 03 DEC 30 90
#TEST MDY 03 DEC 30 990 FAIL bad year
#TEST MDY 03 DEC 00 1990 FAIL bad DOM
#TEST MDY 03 DEC 01 2300 FAIL ambiguous; time appears to be 2300 where year is expected
#TEST MDY 03 DECEMBER 30 1990
#TEST MDY 03 DECMEBER 30 90
#TEST MDY 03 DECIEMBRE 30 1990
#TEST MDY 03 DECIEMBRE 00 1990 # FAIL no 00 day
#TEST MDY 03 DECEMBER 01 2300 FAIL ambiguous; time appears to be 2300 where year is expected


// FORM: MMM, YYYY or Month, YYYY comma optional. 4-digit year required
#RULE MDY 04 \b<MON_NAME>[\s,.]+<YEAR>\b
#TEST MDY 04 DEC 1990
#TEST MDY 04 DEC, 1990
#TEST MDY 04 DEC. 1990
#TEST MDY 04 DECEMBER, 1990
#TEST MDY 04 DECIEMBRE, 1990
#TEST MDY 04 DÉCEMBRE, 1990
#TEST MDY 04 DÉC, 1990

// FORM: MMM of YYYY
#RULE MDY 04a \b<MON_NAME>\s+<OF>\s+<YEAR>\b
#TEST MDY 04a DEC of 1990
#TEST MDY 04a DECEMBER of 1990

#RULE MDY 05 \b<DOM>\s+<MON_NAME>[\s,]+<YEARYY>\b
#TEST MDY 05 30 DEC 1990
#TEST MDY 05 30 DEC 90
#TEST MDY 05 01 DEC 00
#TEST MDY 05 01 DEC 02
#TEST MDY 05 30 DECEMBER 1990
#TEST MDY 05 30 DECMEBER 1990
#TEST MDY 05 30 DECIEMBRE 1990

#RULE MDY 06a \b<MON_NAME>[\s.]+<DOM><DAY_ENUM>[\s,]+<YEAR>\b
#TEST MDY 06a September 19th, 2017
#TEST MDY 06a September 19th, 17 # FAIL
#TEST MDY 06a September 19 th, 17 # FAIL
#TEST MDY 06a Sept. 19th, 2017
#TEST MDY 06a Sept 19th, 2017
#TEST MDY 06a Sept 1st, 2017
#TEST MDY 06a Sept 23rd, 2017
#TEST MDY 06a Sept 15th, 2017
#TEST MDY 06a Sept 22nd, 2017

#RULE MDY 06b \b<DOM><DAY_ENUM>\s+<OF>?\s*<MON_NAME>[\s,]+<YEAR>\b
#TEST MDY 06b 19th September, 2017
#TEST MDY 06b 19th of September, 2017

#CLASS DMYT opensextant.extractors.xtemporal.DateTimeMatch
#RULE DMYT 01 \b<DOM>\s+<MON_NAME>[\s,]+<YEARYY> <hh>:?<mm>\b
#TEST DMYT 01 30 DEC 1990 0400
#TEST DMYT 01 30 DEC 90 0400
#TEST DMYT 01 11 JUN 14 1815 06:15 PM, 11 JUNE 2014
#TEST DMYT 01 25 March 2012 04:00
#TEST DMYT 01 25 March, 2012 04:00



// FORM: DATE: DD-MON-YYYY
#CLASS DMY opensextant.extractors.xtemporal.DateTimeMatch
#RULE DMY 01 \b<DOM>-<MON_NAME>-<YEARYY>\b
#TEST DMY 01 12-DEC-90
#TEST DMY 01 12-DEC-1990

// FORM: DATE: DD MON YYYY
#RULE DMY 02 \b<DOM>\s*<MON_NAME>\s*<YEARYY>\b
#TEST DMY 02 12 DEC 90
#TEST DMY 02 12 DEC 1990
#TEST DMY 02 12DEC90
#TEST DMY 02 12DEC1990
#TEST DMY 02 12MARCH1999
#TEST DMY 02 12FEBBRAIO1999
#TEST DMY 02 12JUL1999
#TEST DMY 02 12JULIO1999

// FORM: DATE: YYYY-MM-DD as it appears in free text of documents.
// The limitations of this pattern are related to how it was used.
// This is a relatively modern format; was this format used in text in 1700s?
#CLASS YMD opensextant.extractors.xtemporal.DateTimeMatch
#RULE YMD 01 \b<YEAR><DSEP1><MM><DSEP2><DOM>\b
#TEST YMD 01 2001-11-11
#TEST YMD 01 0001-04-34 # FAIL
#TEST YMD 01 1001-04-30 # FAIL
#TEST YMD 01 2001-04-30
#TEST YMD 01 1990-04-30
#TEST YMD 01 1790-04-30 # FAIL -- 1800 01 01 is earliest date for this pattern.
#TEST YMD 01 a2001-04-30 # FAIL
#TEST YMD 01 c2001-04-30 # FAIL
#TEST YMD 01 42001-04-30 # FAIL


// ........................................
// DATE TIME PATTERNS, DTM
// ........................................
#CLASS DTM opensextant.extractors.xtemporal.DateTimeMatch

// FORM: A|O|P|R DDHHMMZ MMM YY
#RULE DTM 01 \b<DD><hh><mm><SHORT_TZ>\s*<MON_ABBREV>\s*<YY>\b
#TEST DTM 01 A 301400Z DEC 90
#TEST DTM 01 R 301400Z DEC 90
#TEST DTM 01 A 351400Z DEC 90 # FAIL day out of range

// FORM: YYYYMMDDTHHMMZ
#RULE DTM 02 \b<YEAR><MM><DD>T<hh><mm><SHORT_TZ>\b
#TEST DTM 02 20101230T1400Z

// FORM: YYYYMMDDTHHMM ZZZ
#RULE DTM 02a \b<YEAR><MM><DD>T<hh><mm> <LONG_TZ>\b
#TEST DTM 02a 20101230T1400 EST # UTC-0500. parses to 1900 UTC
#TEST DTM 02a 20101230T1400 BNT # UTC+0800. parses to 0600 UTC
#TEST DTM 02a 20101230T1400 XXX # FAIL -- invalid TZ
#TEST DTM 02a 20101230T1400 PST # UTC-0800. parses to UTC

// FORM: YYYY-MM-DDTHH:MM:SS ... ISO Time. ISO 8601 uses "-", not "/". But should be validated in normalization.
// DTM 04 is collapsed into this pattern.
#RULE DTM 03 \b<YEAR><DSEP1><MM><DSEP2><DD>[T ]<hh>:<mm>:<ss>
#TEST DTM 03 2010-12-30T14:00:01:12
#TEST DTM 03 2010-12-30T14:00:02

// FORM: YYYY-MM-DDTHH:MM ... ISO Time. See 03 above.
#RULE DTM 03b \b<YEAR><DSEP1><MM><DSEP2><DD>[T ]<hh>:<mm>
#TEST DTM 03b 2010-12-30T14:01:11:12
#TEST DTM 03b 2010-12-30T14:02:12
#TEST DTM 03b 2010-12/30T14:03:13 # FAIL

// FORM: MM/DD/YY* HH:MM:SS.
// TODO: 12-hour clock time and detect PM/AM. This HH:MM is only 24 hour clock.
#RULE DTM 05a \b<MONTH><DSEP1><DOM><DSEP2><YEARYY>\s?<hh>:<mm>:<ss>
#TEST DTM 05a 12-30-20 14:00:00:12 # extra data associated with value?
#TEST DTM 05a 12-30-2020 14:00:01
#TEST DTM 05a 12/30-2020 14:00:02 # FAIL Test mixed punctuation.
#TEST DTM 05a 12/30/2020 14:00:02 # Test mixed punctuation.
#TEST DTM 05a 12/30/20 14:00 # Test mixed punctuation.

// FORM: MM/DD/YY* HH:MM.
// TODO: 12-hour clock time and detect PM/AM. This HH:MM is only 24 hour clock.
#RULE DTM 05b \b<MONTH><DSEP1><DOM><DSEP2><YEARYY>\s?<hh>:<mm>
#TEST DTM 05b 12-30-20 14:00:00:12 # extra data associated with value?
#TEST DTM 05b 12-30-2020 14:01:01
#TEST DTM 05b 12/30-2020 14:02:02 # FAIL Test mixed punctuation.
#TEST DTM 05b 12/30/2020 14:03:02 # Test mixed punctuation.
#TEST DTM 05b 12/30/20 14:04 # Test mixed punctuation.
#TEST DTM 05b 12.30.20 14:04 # European convention for Date.
#TEST DTM 05b 2.30.20 14:04 # FAIL No 30 FEB. European convention for Date. Non-zero padded
#TEST DTM 05b 2.30/2020 14:04 # FAIL No 30 FEB., Mixed Separators. European convention for Date. Non-zero padded.
#TEST DTM 05b 4.30.20 14:04 # European convention for Date. Non-zero padded
#TEST DTM 05b 4.30/2020 14:04 # FAIL Mixed Separators. European convention for Date. Non-zero padded.


// 8-digit date is great when used in short spans of text; But as a general pattern, this matches any 8 digit number.
// Not very accurate on data or large texts.
// FORM: YYYYMMDD
//#RULE DTM xx \b<YEAR><MM><DD>\b
//#TEST DTM xx 20101230

628 changes: 628 additions & 0 deletions src/main/python/opensextant/resources/geocoord_patterns_py.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,628 @@
/**
* NOTICE
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* **************************************************************************
* NOTICE
* This software was produced for the U. S. Government under Contract No.
* W15P7T-12-C-F600, and is subject to the Rights in Noncommercial Computer
* Software and Noncommercial Computer Software Documentation Clause
* 252.227-7014 (JUN 1995)
*
* (c) 2009-2013 The MITRE Corporation. All Rights Reserved.
* **************************************************************************
*/
// HISTORY
// =================
// 2012-07-19 MCU added range test case for DD-01, DM-01, DMS-01
// "98 degrees" test: "98 44 009 N, 292 24 03 E FAIL", for example.
// 2012-08-08 MCU removed notion that LONGITUDE is [01][0-7]\d I don't rembember where I got that.
// [01]?\d\d is sufficient and validation of match will be done on all matches. So these patterns can be simplified.
// 0\d\d
// \d{1,3}, etc.
// 2012-10-10 MCU Looking at DMS patterns abmiguities. Order matters. Flexpat 1.1 now preserves order of appearance
// in this file. First rule to fire a match will be considered best match, others with exact span will be duplicates.
//
// 2012-11-26 MCU DM and DMS patterns without hemisphere patterns are now supported.
// Truth data included as a CSV file. See OpenSextant GeocoderEval for source and scripts.
//
// 2013-01-05 MCU Saw reports that had invisible 'guarded area marker char' (U+0096) as separator space between lat/lon
// -- So latlonSep has range of allowable space or control chars U+0080 to U+00A0.
//
// 2019-12-17 MCU Added UTM reference and additional test cases. More needed, to cover range of Lat-band widths
//
// 2022-04-12 MCU Look for mismatched punctuation, again. "DEG: DEG M.s DEG M.s"
// Pattern guidelines: use "\" not "\\"
// To really mean "\\" -- escape a slash use <slash>
//
//Hemispheres
#DEFINE hemiLat [NSns]
#DEFINE hemiLon [EWew]
// "-" as first char is not a range char.
#DEFINE hemiLatSign [-+]
#DEFINE hemiLonSign [-+]
#DEFINE hemiLatPre [-+]|[NSns]\s?
#DEFINE hemiLonPre [-+]|[EWew]\s?
//space and punctuation
#DEFINE slash \\
#DEFINE degSym [˚°º]
// LAT DM separator -- removed ".", as D.M is a decimal degree pattern.
// To specifically support D.M.S patterns best use a specific pattern for that one case.
// LAT SEPARATORS
#DEFINE dmLatSep \s?DEG|[-˚°º:]|\s
#DEFINE msLatSep [-'´’′:.\s]
//#DEFINE secLatSep ["'\u00B4\u2019\u201D\u2033\u2032\s]{1,2}

// LON SEPARATORS -- same as LAT
#DEFINE dmLonSep \s?DEG|[-˚°º:]|\s
#DEFINE msLonSep [-'´’′:.\s]
//#DEFINE secLonSep ["'\u00B4\u2019\u201D\u2033\u2032\s]{1,2}

#DEFINE secLatSep ["'´’”″′\s]{0,2}
#DEFINE secLonSep ["'´’”″′\s]{0,2}

// #DEFINE latlonSepNoDash [;,/|=x\s\u0080-\u00A0]
// Allows a "-" dash between LAT and LON, where dash is not a hemi sign
// The pattern where a "Dash(-)" is used to separate a Lat/Lon will be its own pattern.
// For example 0156N-4555W uses a dash, but has hemisphere symbol as ordinal.
// +0156 - -4555 is problematic to detect
#DEFINE latlonSep [;,/|=x\s\u0080\u0096\u00A0]?
#DEFINE latlonSep3 [;,/|=x\s\u0080\u0096\u00A0]{1,3}
#DEFINE latlonSep03 [;,/|=x\s\u0080\u0096\u00A0]{0,3}
#DEFINE dmsSep [-.:\s]

// SPACE: Coordinate patterns can break across lines ~ HTML, word docs, word wrapping, etc.
// RULE: Allow no more than 3 whitespace chars in the general case; In some cases optional space may be worth testing
//#DEFINE space \\s{1,3}
//#DEFINE optSpace \\s{0,3}

// Parts of a decimal degree Latitude/Longitude
#DEFINE decDegLat \d?\d\.\d{1,20}
#DEFINE decDegLon [0-1]?\d?\d\.\d{1,20}

// degree decimal minute lat/lon; either:
// MM.m...,
// M.m...,
// M-mm is MIN-SEC, not MIN.DECMIN
// MM-mmm...
// M-mmm...
// NOT M-mm or MM-mm which implies Min/Sec, not decimal
#DEFINE decMinLat [0-5]?\d\.\d{1,20}
#DEFINE decMinLat3 [0-5]?\d[-.]\d{3,20}
#DEFINE decMinLon [0-5]?\d\.\d{1,20}
#DEFINE decMinLon3 [0-5]?\d[-.]\d{3,20}

// Difference between DMS fixed-width and variable lenght fields could be accomplished by these greedy matchers
// \d{1,2} or \d?, for example.
// if 2 digits exist, then \d{1,2} will consume both, before matching next item in pattern.
// Fear: too many variable or optional items will lead to a lot of false positives.
// Later if this framework proves to work we could consolidate the variable length and fixed length patterns back to a common pattern.
// For now trying to make this more accurate and explicit.

// Parts of a DMS latitude/Longitude
#DEFINE degLat \d{1,2}
#DEFINE degLon [01]?\d?\d

//Parts of a Degrees Minutes Seconds fractional seconds
#DEFINE fractMinLat [-\.]\d{1,6}
#DEFINE fractMinLon [-\.]\d{1,6}
#DEFINE fractSecLat \.\d{1,6}
#DEFINE fractSecLon \.\d{1,6}
#DEFINE fractSecLatOpt \.?\d{1,6}
#DEFINE fractSecLonOpt \.?\d{1,6}
#DEFINE fractMinLat3 \d{3,6}
#DEFINE fractMinLon3 \d{3,6}

// Fixed-length patterns
#DEFINE dmsDegLat [0-8]\d
#DEFINE dmsDegLon [01]\d\d
#DEFINE dmsMinLat [0-5]\d
#DEFINE dmsMinLon [0-5]\d
#DEFINE dmsSecLat [0-5]\d
#DEFINE dmsSecLon [0-5]\d

#DEFINE minLat [0-5]?\d
#DEFINE secLat [0-5]?\d
#DEFINE minLon [0-5]?\d
#DEFINE secLon [0-5]?\d

#DEFINE latMarker Lat\w*[:=]?
#DEFINE lonMarker Lon\w*[:=]?

// Parts of an MGRS and UTM
#DEFINE UTMBand [A-HJ-NP-Z]
#DEFINE UTMZone [0-5]?\d
#DEFINE MGRSQuad [A-HJ-NP-Z][A-HJ-NP-V]
#DEFINE MGRSZone [0-6]?\d\s?[C-HJ-NP-X]

// http://en.wikipedia.org/wiki/Universal_Transverse_Mercator
// UTM Easting is from 167,000 to 833,000 m
// Northing is from 0 to 10,000,000 m ( <= 9,999,999m)
#DEFINE UTMEasting \d{6}
#DEFINE UTMNorthing \d{3,7}

// MGRS precision is 1m. Quad is 100,000m sq so resolution is 5 digits + 5 digits with optional whitespace
// 99999n 99999e -- in MGRS we never see "m" units or N/E denoted explicitly
// Occassionally, newlines or whitespace are interspersed in offset
// minimal:
// dd
// ddddd ddddd with an additional one or two white spaces. The offsets start and end with numbers. Only whitespace between is optional.
// ddddd dddddd additional digit in Easting -- trailing 6th digit is a typo; trim off
// dddddd ddddd additional digit in Northing -- trailing 6th digit is a typo; trim off
// ddddddddddd Typo introduces ambiguity -- only correct thing is to split on halfway point +/- 1 digit and emit two answers
//
// "precise" pattern is too precise.
#DEFINE Easting_Northing_Precise \d{2,5}\s?\d{2,5}
#DEFINE Easting_Northing \d[\s\d]{0,10}\d


//---------------------------RULES------------------------------------------------

# #RULE FAM ENUM PATTERN | PATTERN-WITH-GROUPS
# #TEST FAM ENUM TEXT-WITH-COORD
# #NEGATIVE FAM ENUM TEXT-WITH-INVALID-COORD --- UNUSED SO FAR.

//..........................................
// MGRS
//..........................................
// FORM: ZZZQQ ee* nn*
// Z Q E N Zone (2-3 char), Quad (2 char), Easting (1-5 digits) Northing(1-5 digits)
// http://en.wikipedia.org/wiki/Military_grid_reference_system
// http://www.armystudyguide.com/content/Prep_For_Basic_Training/Prep_for_basic_land_navigation/locate-a-point-using-the-.shtml
#CLASS MGRS opensextant.extractors.xcoord.MGRSMatch
#RULE MGRS 01 \b<MGRSZone>\s?<MGRSQuad>\s{0,2}<Easting_Northing>\b
#TEST MGRS 01 4QFJ 50007000
#TEST MGRS 01 38SMB4611036560
#TEST MGRS 01 38SMB46113656
#TEST MGRS 01 8SMB46113656
#TEST MGRS 01 38SMB 4611036560
#TEST MGRS 01 38SMB 46113656
#TEST MGRS 01 38SMB 461136560 # asymmetric
#TEST MGRS 01 38SMB 46110 3656
#TEST MGRS 01 38SMB 4611 03656
#TEST MGRS 01 38SMB 4611 03656 60
#TEST MGRS 01 38SMB 4611 03656 4 km ## random numeric text following a northing.
#TEST MGRS 01 38SMB 461 03656 4 km ## random numeric text following a northing.
#TEST MGRS 01 38SMB 0461 0365
#TEST MGRS 01 38SMB 00461 00365
#TEST MGRS 01 38SMB 04610 03650
#TEST MGRS 01 38SMB 04610 0365
#TEST MGRS 01 38SMB 46110365 60
#TEST MGRS 01 38SMB 46110365 60
#TEST MGRS 01 38SMB 46110365 60000 # supposedly a large number follows -- pattern should not match.
#TEST MGRS 01 38 SMB 4611 3656
#TEST MGRS 01 42 RPR 4611 3656 FAIL 42 RP* P not valid lat band in 42 longitudinal zone
#TEST MGRS 01 42WDB 4611 3656 FAIL, D not a valid zone
#TEST MGRS 01 63SNV 6567 7888 FAIL
#TEST MGRS 01 00SNV 6567 7888 FAIL
#TEST MGRS 01 0SNV 6567 7888 FAIL
#TEST MGRS 01 37SCA211914648 FAIL Mismatched Northing/Easting. Disable XCoord MGRS_STRICT to rescue coordinate.
# MGRS date patterns -- sometimes dates are valid MGRS other times they match the pattern, but are invalid MGRS locations.
#TEST MGRS 01 20 PER 1000 FAIL - appears to be a rate of some sort.
#TEST MGRS 01 10 JAN 1994 FAIL - appears to be a date of some sort.
#TEST MGRS 01 10 JAN 94 FAIL - appears to be a date of some sort.
#TEST MGRS 01 1 JAN 94 FAIL - appears to be a date of some sort.
#TEST MGRS 01 01 JAN 94 FAIL - appears to be a date of some sort. past century
#TEST MGRS 01 10 JAN 13 FAIL - appears to be a date of some sort.
#TEST MGRS 01 1 JAN 13 FAIL - appears to be a date of some sort.
#TEST MGRS 01 4 JUL 2008 FAIL - appears to be a date of some sort; and also invalid MGRS
#TEST MGRS 01 01 JAN 13 FAIL - appears to be a date of some sort. current year, 2013
#TEST MGRS 01 14 JAN 00 FAIL - appears to be a date of some sort. year 2000
#TEST MGRS 01 14 JAN 30 FAIL - appears to be a date of some sort. year 1930?
#TEST MGRS 01 14 EST 2030 FAIL - appears to be a date of some sort. year 2030?
#TEST MGRS 01 14 EST 2008 FAIL - appears to be a date of some sort
#TEST MGRS 01 14 EDT 2008 FAIL - appears to be a date of some sort
#TEST MGRS 01 61 EDT 2008 FAIL - appears to be a date of some sort
#TEST MGRS 01 7JAN13 1500 FAIL - obvious date, Jan. 7th, 2013, 3PM
#TEST MGRS 01 17MAR921100 TEST is a date, but should pass if no MGRS filters are on.
#TEST MGRS 01 38smb 00461 00365 FAIL - lower case MGRS grids should not match by default; it is so rare.

//..........................................
// UTM
//..........................................
// REFERENCE: https://en.wikipedia.org/wiki/Universal_Transverse_Mercator_coordinate_system
// FORM: (?# ZZA DDDDDD DDDDDDD )
#CLASS UTM opensextant.extractors.xcoord.UTMMatch
#RULE UTM 01 \b<UTMZone>\s?<UTMBand>\s?<UTMEasting>[mE]{0,2}\s?<UTMNorthing>[mN]{0,2}\b
#TEST UTM 01 17N 699990 3333335
#TEST UTM 01 17S 699990 3333335
#TEST UTM 01 17N 630084 4833438 # Should be CN tower, as mentioned in wikipedia above.
#TEST UTM 01 17T 699990 3333335
#TEST UTM 01 7S 699990 3333335
#TEST UTM 01 17N6999903333335
#TEST UTM 01 17 6999903333335 # FAIL ambiguous
#TEST UTM 01 17N 699990m 3333335m
#TEST UTM 01 17S 699990mE 3333335mN
#TEST UTM 01 17N 699990Em 3333335Nm
#TEST UTM 01 17X 699990e 3333335n Pass, but questionable lat band may contradict Northing; Northern Hemi, X lat
#TEST UTM 01 17D 699990E 3333335N Ditto, Southern Hemi = D lat


//..........................................
// Degrees Decimal minutes
//..........................................
// TODO: Clean up all test cases below for DM and DMS
//
// 00 DD MMH-mmm DDD MMH-mmm
// 01a DD MM H DDD MM H
// 01a DD MM.mmm H DDD MM.mmm H
// 01a DD DEG MM.m H DDD DEG MM.m H
// 01b H DD MM H DDD MM
// 01b H DD MM.mmm H DDD MM.mmm
//
// Variables: "x" or "BY" are equivalent separators to ";/,|(sp)"
// Deg symbol, ', ", or '' are all optional, but if present, they suggest pattern is a coordinate, not just a pair of numbers.
// Lon DDD is 1-3 numbers, Lat DD is 1-3 numbers, leading '0' optional. '

// Pattern logic for dealing with factional minutes;
// work-in progress:
// has separators?
// YES => is sepator a "-" dash throughout?
// YES => if fractMin length is 2 chars, then this is DMS, not DM.m
// NO => is factMin prefixed with "-" and
//
//

// FORM: DDMMH-mmm
#CLASS DM opensextant.extractors.xcoord.DegMinMatch

#RULE DM 00 \b<dmsDegLat>[-\s]?<dmsMinLat><hemiLat><fractMinLat><latlonSep><dmsDegLon>[-\s]?<dmsMinLon><hemiLon><fractMinLon>
#TEST DM 00 4218N-009 10224W-003
#TEST DM 00 4218N.009 10224W.003
#TEST DM 00 42-18N-009 102-24W-003
#TEST DM 00 42 18N-009 102 24W-003
#TEST DM 00 42 18N-009102 24W-003 FAIL same pattern test as above, but no separator.
#TEST DM 00 78 44N-009, 092 24N-003 FAIL, both hemispheres are N
#TEST DM 00 98 44N-009, 292 24W-003 FAIL, XY out of range

// VARIABLE LENGTH FIELDS
// FORM: DDMMmmmHDDDMMmmmH
#RULE DM 01a \b<degLat><dmLatSep>\s?<minLat><fractMinLat>?<msLatSep>?\s?<hemiLat><latlonSep3><degLon><dmLonSep>\s?<minLon><fractMinLon>?<msLonSep>?\s?<hemiLon>
#TEST DM 01a 42 18.009N 102 24.003W
#TEST DM 01a 42 18.00 N 102 24.00W
#TEST DM 01a 42 18-00 N 102 24-00W
#TEST DM 01a 42-18-09N; 102-24-03W # FAIL Ambiguous, but good test. This is valid DMS only.
#TEST DM 01a 42 9-00 N 102 6-00W This pattern needs to be more explicit -- its own pattern?--- MM-mm used to represent MM.mm fractional minutes is tough, as it is just as easily MM-SS min/seconds; However MM-mmm is more obvious as fractional minutes.
#TEST DM 01a 42 9.009 N 102 6.003W
#TEST DM 01a 42 18-009N 102 24-003W
#TEST DM 01a 42.18.009N 102.24.003W
#TEST DM 01a 42-18-009N; 102-24-003W
#TEST DM 01a 42° 18.009'N; 102° 24.003'W
#TEST DM 01a 42° 18.009N; 102° 24.003W
#TEST DM 01a 42°18.009N; 102°24.003W
#TEST DM 01a 12° 18.009N; 2° 24.003W
#TEST DM 01a 42-18-09N; 102-24-03W # FAIL: DMS?, MM-SS is not decimal Min MM.SS it is Min Sec
#TEST DM 01a 42-18-009N; 102-24-003W # Clearly Decimal, as third slot is 3 digits, so it seems 18.009 Min, rather than 18min 009sec
#TEST DM 01a 42.18.009N 102.24.003W
#TEST DM 01a 3: 42.18N 102.24W # FAIL, avoid the 3: and let DD pattern pick this up.
#TEST DM 01a 42.18.009N x 102.24.003W
#TEST DM 01a 42.18.9999 N 102.24.5555 W
#TEST DM 01a 42°18'N 102°24'W
#TEST DM 01a 42° 18'N 102° 24'W
#TEST DM 01a 42 DEG 18.0N 102 DEG 24.0W
#TEST DM 01a 01DEG 44 N 101DEG 44 E
#TEST DM 01a 42:18n 102:24w
#TEST DM 01a 42:18n - 102:24w
#TEST DM 01a 42:00N-102:18E


// VARIABLE LENGTH FIELDS
// FORM: DD MM.mmm H
// lat/lon separator -- cannot use "-" (latlonSep) as it is ambiguous if optional sep with optional preceding hemisphere which one is which.
//
#RULE DM 01b \b<hemiLatPre>\s?<degLat><dmLatSep>\s?<minLat><fractMinLat>?<msLatSep>?<latlonSep3><hemiLonPre>\s?<degLon><dmLonSep>\s?<minLon><fractMinLon>?<msLonSep>?
#TEST DM 01b N42 18.00 W102 24.00
#TEST DM 01b N 42 18.00 W 102 24.00
#TEST DM 01b N42 18-00 W102 24-00
#TEST DM 01b N42 18-00 x W102 24-00
#TEST DM 01b N 4218.0 W 10224.0
#TEST DM 01b N42.18.005 x W102.24.008
#TEST DM 01b N42 18-005 x W102 24-008
#TEST DM 01b +42° 18 -102° 24
#TEST DM 01b N42 18' W102 24'
#TEST DM 01b +42 18 -102 24 # FAIL
#TEST DM 01b N42.18.005 x W102.24.008
#TEST DM 01b N42:18 W102:24
#TEST DM 01b n42:18 w102:24
#TEST DM 01b N 01° 44' E 101° 22'
#TEST DM 01b N 01° 44' - E 101° 22'
#TEST DM 01b N 01° 44' - 101° 22' Could possibly not PASS, as lat hemisphere is North; ambiguous if lon hemi is West or if "-" dash
#TEST DM 01b 01° 44' - -101° 22' A really obscure use of dash, next to a hemisphere sign. TESTING Only
#TEST DM 01b -1 12-00-32 FAIL: lopsided fields and/or precision. Use of "-" separator is inconsistent.
#TEST DM 01b -1-45 -12-00-32 FAIL


// COPY OF 01a/b to allow for fixed-width min, sec, but w/out delimiters.
// FORM: DDMM.m*H
#RULE DM 02a \b<dmsDegLat><minLat><fractMinLat>\s?<hemiLat><latlonSep03><dmsDegLon><minLon><fractMinLon>\s?<hemiLon>\b
#TEST DM 02a 4218.009N 10224.003W
#TEST DM 02a 4209.009N 10206.003W
#TEST DM 02a 4209.00N 10206.00W
#TEST DM 02a 4209.0N 10206.0W
#TEST DM 02a 4209.0N - 10206.0W #FAIL dash not allowed as separator currently.
#TEST DM 02a 4209.0N x 10206.0W

// FORM: HDDMM.m*
#RULE DM 02b \b<hemiLat>\s?<dmsDegLat><dmsMinLat><fractMinLat><latlonSep03><hemiLon>\s?<dmsDegLon><dmsMinLon><fractMinLon>\b
#TEST DM 02b N4218.0 W10224.0
#TEST DM 02b N4218.009W10224.003
#TEST DM 02b N4218-0018 W10224-0444

// Desc: FIXED, NO DELIM, FRACTIONAL MINUTES -- 03a/b
#RULE DM 03a \b<dmsDegLat><dmsMinLat><fractMinLat3>\s?<hemiLat><latlonSep><dmsDegLon><dmsMinLon><fractMinLon3>\s?<hemiLon>\b
#TEST DM 03a 4218009N10224003W
#TEST DM 03a 4218009N;10224003W
#TEST DM 03a 4218009N 10224003W

#RULE DM 03b \b<hemiLat>\s?<dmsDegLat><dmsMinLat><fractMinLat3><latlonSep><hemiLon>\s?<dmsDegLon><dmsMinLon><fractMinLon3>\b
#TEST DM 03b N4218009W10224003
#TEST DM 03b N4218009;W10224003
#TEST DM 03b N4218009 W10224003


// Finds DM or DM.m degree patterns that have no hemisphere.
// TODO: Revisit why optional hemisphere poses a problem. For now we assume the lat/lon separator is not a dash
//
#RULE DM 03-deg \b<hemiLatSign>?<degLat><degSym>\s*<minLat><fractMinLat>?<msLatSep>?<latlonSep3><hemiLonSign>?<degLon><degSym>\s*<minLon><fractMinLon>?<msLonSep>?\b
#TEST DM 03-deg 42° 18, 102° 24
#TEST DM 03-deg 42° 18' 102° 24'
#TEST DM 03-deg 42°18 x 102° 24
#TEST DM 03-deg 42˚ 18.44, 102˚ 24.11 # Vary deg symbol
#TEST DM 03-deg 42° 18.44' 102° 24.11'
#TEST DM 03-deg 42°18.44 x 102° 24.11
#TEST DM 03-deg (+42°18.44 x 102° 24.11)
#TEST DM 03-deg (+42°18.44 x +102° 24.11)
#TEST DM 03-deg (42°18.44 x -102° 24.11)


// desc: FIXED WIDTH FIELDS, NO SEP
#RULE DM 04a \b<hemiLat><dmsDegLat><dmsMinLat><latlonSep><hemiLon><dmsDegLon><dmsMinLon>
#TEST DM 04a N4218 W10224

// FORM: DDMMH DDMMH
#RULE DM 04b \b<dmsDegLat><dmsMinLat>\s*<hemiLat><latlonSep><dmsDegLon><dmsMinLon>\s*<hemiLon>
#TEST DM 04b 4218N 10224W

// RANDOM PATTERNS:
// FORM: /DDMMHd/DDDMMHd/ where d=checksum value
#RULE DM 05 /<dmsDegLat><dmsMinLat><hemiLat>\d/<dmsDegLon><dmsMinLon><hemiLon>\d/
#TEST DM 05 /4218N4/10224W5/

#DEFINE xySep \s?[Xx/,]\s?

// DM with minimal symbols. Required LAT/LON separator. Otherwise this is a list of numbers with +/- signs.
// FORM: +/-DD MM.mmm x +/-DDD DD.DDD
#RULE DM 06 <hemiLatSign><degLat>\s<decMinLat><xySep><hemiLonSign><degLon>\s<decMinLon>
#TEST DM 06 +42 18.0 x -102 24.0
#TEST DM 06 +42 18.0 X -102 24.0
#TEST DM 06 +42 18.0 / -102 24.0
#TEST DM 06 +42 18.0, -102 24.0

// DOT patterns -- We are for now assuming
//
// DD.MM.SS DDD.MM.SS is DMS
// DD.MM.mmm* DDD.MM.mmm* is DM
//
// and Longitude in these patterns is 3-digits
//
#RULE DM 01a-dot \b<dmsDegLat>\.<dmsMinLat>\.<fractMinLat3>\s?<hemiLat><latlonSep>*<dmsDegLon>\.<dmsMinLon>\.<fractMinLon3>\s?<hemiLon>
#TEST DM 01a-dot 03.44.777N 111.10.044W
#TEST DM 01a-dot 03.44.777 N 111.10.044 W
#TEST DM 01a-dot 03.44.7778 N 111.10.0442 W

#RULE DM 01b-dot \b<hemiLat>\s?<dmsDegLat>\.<dmsMinLat>\.<fractMinLat3><latlonSep>*<hemiLon>\s?<dmsDegLon>\.<dmsMinLon>\.<fractMinLon3>
#TEST DM 01b-dot N03.44.777 W111.10.044
#TEST DM 01a-dot N 03.44.777 W 111.10.044
#TEST DM 01a-dot N 03.44.7778 W 111.10.0442


#DEFINE trivialSep \s{0,2}

//..........................................
// Degrees Minutes Seconds
//..........................................
// 01a DD MM SS H DDD MM SS H
// 01b H DD MM SS H DDD MM SS
#CLASS DMS opensextant.extractors.xcoord.DegMinSecMatch
#RULE DMS 01a <latMarker>?\s*<degLat><dmLatSep>\s?<minLat><msLatSep>\s?<secLat><fractSecLat>?<secLatSep><trivialSep><hemiLat>\s?<latlonSep3><lonMarker>?\s*<degLon><dmLonSep>\s?<minLon><msLonSep>\s?<secLon><fractSecLon>?<secLonSep>\s?<hemiLon>
// Beware unicode
#TEST DMS 01a 42 18' 00N 102 24' 00W
#TEST DMS 01a 42 18' 00”N 102 24' 00”W
#TEST DMS 01a 2 8' 00"N 12 24' 00"W
#TEST DMS 01a 42° 18' 00"N 102° 24' 00"W
#TEST DMS 01a 42 18-00 N 102 24-00W
#TEST DMS 01a 42 9-00 N 102 6-00W
#TEST DMS 01a 34°22′24″N 49°14′27″E
#TEST DMS 01a 34°22 24″N 49°14 27″E
#TEST DMS 01a 34°22′24N 49°14′27E
#TEST DMS 01a 34° 22′ 24″N 49° 14′ 27″E
#TEST DMS 01a 34° 22' 24"N 49° 14' 27"E
#TEST DMS 01a 34°22'24"N 49°14'27"E
#TEST DMS 01a 34°22'24"N 49°14'27"E
#TEST DMS 01a 34 22'24"N 49 14'27"E
#TEST DMS 01a 34 22' 24"N 049 14' 27"E
#TEST DMS 01a 34 22 24N 049 14 27E
#TEST DMS 01a 34 22 24 N 049 14 27 E
#TEST DMS 01a 34 22 24N 049 14 27E
#TEST DMS 01a 34 22 24 N 49 14 27 E
#TEST DMS 01a 98 44 009 N, 292 24 03 E FAIL
#TEST DMS 01a 27:37:45N/82:42:10W // original
#TEST DMS 01a 27 37' 45"N/82 42' 10"W // original
#TEST DMS 01a "27° 37' 45’’N, 82° 42' 10’’W", // original
#TEST DMS 01a "27° 37' 45’N, 82° 42' 10’W", // single second hash sym
#TEST DMS 01a "27° 37' 45’’N 82° 42' 10’’W", // no lat/lon sep
#TEST DMS 01a "27° 37 45N, 82° 42 10W" // no min hash.
#TEST DMS 01a 01°44'55.5"N 101°22'33.0"E
#TEST DMS 01a 01° 44' 55.5"N 101° 22' 33.0"E
#TEST DMS 01a 01° 44' 55.5"N 101° 22' 33"E // asymmetric in decimal seconds precision
#TEST DMS 01a 42 18' 00" -102 24' 00"
#TEST DMS 01a GEO:(42° 18' 00" 102° 24' 00")
#TEST DMS 01a GEO:42° 18' 00" 102° 24' 00"
#TEST DMS 01a 03-04-05 12-11-10 Ambiguous date or coordinate -- very possibly neither.
#TEST DMS 01a 03-04-05 12:11:10 FAIL - is a date/time pattern
#TEST DMS 01a 99-04-05 12:11:10 FAIL - is a date/time pattern - part of 1999-04...
#TEST DMS 01a 03-04-58 12:11:10 FAIL - is a date/time pattern - DOB 1958-03-04
#TEST DMS 01a 03-04-99 12:11:10 FAIL - is a date/time pattern - DOB 1958-03-04
#TEST DMS 01a 14-04-33 12:11:10 FAIL - is a date/time pattern
#TEST DMS 01a Latitude: 41º58'46"N, Longitude: 87º54'20"W
#TEST DMS 01a location 28˚22′24″N 46˚14′27″E


// DMS 01 with hemisphere leading
#RULE DMS 01b \b<hemiLatPre><degLat><dmLatSep>\s?<minLat><msLatSep>\s?<secLat><fractSecLat>?<secLatSep><latlonSep3><hemiLonPre><degLon><dmLonSep>\s?<minLon><msLonSep>\s?<secLon><fractSecLon>?<secLonSep>\b
#TEST DMS 01b N01°44'55.5" E101°22'33.0"
#TEST DMS 01b N01° 44' 55.5" E101° 22' 33.0"
#TEST DMS 01b N 01° 44' 55.5" E 101° 22' 33.0"
#TEST DMS 01b N42 18' 00" W102 24' 00"
#TEST DMS 01b N02 8' 00" W012 24' 00"
#TEST DMS 01b N42° 18' 00" W102° 24' 00"
#TEST DMS 01b +42 18' 00" -102 24' 00"
#TEST DMS 01b +42: 18' 00" -102 24' 00" # FAIL hmm. a coordinate pattern, but a test of mismatch punct
#TEST DMS 01b 7: 12 22.33N 14 43.42W # PASS valid coordinate, but ignore the prefix 7:
#TEST DMS 01b xx +42° 18' 00" -102° 24' 00" xx

// DMS variation using ":" only.
// Only if hemisphere sign is present will this match.
#RULE DMS 01c \b<hemiLatSign><degLat>:<minLat>:<secLat>\s?<hemiLonSign><degLon>:<minLon>:<secLon>\b
#TEST DMS 01c x +42:18:00 -102:24:00 x

// DMS01 with fractional seconds, but no hemisphere symbol, just degree
#RULE DMS 01-deg \b<hemiLatSign>?<degLat><degSym>\s?<minLat><msLatSep>\s?<secLat><fractSecLat>?<secLatSep><latlonSep3><hemiLonSign>?<degLon><degSym>\s?<minLon><msLonSep>\s?<secLon><fractSecLon>?<secLonSep>\b
#TEST DMS 01-deg 01°44'55.5" 101°22'33.0"
#TEST DMS 01-deg 01° 44' 55.5" 101° 22' 33.0"
#TEST DMS 01-deg 01° 44' 55.5" 101° 22' 33"
#TEST DMS 01-deg 01° 44' 55" 101° 22' 33.0"
#TEST DMS 01-deg 01° 44' 55" -101° 22' 33.0"

#RULE DMS 01a-dot \b<dmsDegLat>\.<dmsMinLat>\.<dmsSecLat>\s?<hemiLat><latlonSep><dmsDegLon>\.<dmsMinLon>\.<dmsSecLon>\s?<hemiLon>
#TEST DMS 01a-dot 03.44.777N 111.10.044W
#TEST DMS 01a-dot 03.44.777 N 111.10.044 W
#TEST DMS 01a-dot 03.44.7778 N 111.10.0442 W

#RULE DMS 01b-dot \b<hemiLat>\s?<dmsDegLat>\.<dmsMinLat>\.<dmsSecLat><latlonSep><hemiLon>\s?<dmsDegLon>\.<dmsMinLon>\.<dmsSecLon>
#TEST DMS 01b-dot N03.44.777 W111.10.044
#TEST DMS 01a-dot N 03.44.777 W 111.10.044
#TEST DMS 01a-dot N 03.44.7778 W 111.10.0442

// DMS 01 with dot as a separator.
//
#RULE DMS 02a \b<degLat>\.<minLat>\.<secLat><secLatSep><hemiLat><latlonSep><degLon>\.<minLon>\.<secLon><secLonSep><hemiLon>
#TEST DMS 02a 01.44.55N 055.44.33E

#RULE DMS 02b \b<hemiLat><degLat>\.<minLat>\.<secLat><secLatSep><latlonSep><hemiLon><degLon>\.<minLon>\.<secLon><secLonSep>
#TEST DMS 02b N01.44.55 E055.44.33


// DMS with NO SEPARATORS
#RULE DMS 03a \b<dmsDegLat><dmsMinLat><dmsSecLat>\s?<hemiLat><latlonSep>*<dmsDegLon><dmsMinLon><dmsSecLon>\s?<hemiLon>
#TEST DMS 03a 421800N 1022400W
#TEST DMS 03a 5113N 00425E FAIL

#RULE DMS 03b \b<hemiLatPre><dmsDegLat><dmsMinLat><dmsSecLat><latlonSep><hemiLonPre><dmsDegLon><dmsMinLon><dmsSecLon>
#TEST DMS 03b N421800 W1022400
#TEST DMS 03b N421800W1022400
#TEST DMS 03b N421800/W1022400

// DMS NO SEPARATORS, decimal seconds. Ambiguous -- is additional precision in Fractional Minutes or fractional seconds?
#RULE DMS 04a \b<dmsDegLat><dmsMinLat><dmsSecLat><fractSecLatOpt><hemiLat><latlonSep03><dmsDegLon><dmsMinLon><dmsSecLon><fractSecLonOpt><hemiLon>
#TEST DMS 04a 4218001234N 10224001234W
#TEST DMS 04a 4218001234N - 10224001234W
#TEST DMS 04a 4218001234N x 10224001234W

#RULE DMS 04b \b<hemiLat><dmsDegLat><dmsMinLat><dmsSecLat><fractSecLatOpt><latlonSep03><hemiLon><dmsDegLon><dmsMinLon><dmsSecLon><fractSecLonOpt>
#TEST DMS 04b N4218001234 W10224001234
#TEST DMS 04b N4218001234 W10224001234




//..........................................
// Decimal Degrees
//..........................................
#CLASS DD opensextant.extractors.xcoord.DecimalDegMatch

// FORM: DD-xx, Decimal Deg, Preceding Hemisphere (a) H DD.DDDDDD° HDDD.DDDDDD°, optional deg symbol
#RULE DD 01 \b<hemiLatPre>\s?<decDegLat><degSym>?<latlonSep3><hemiLonPre>\s?<decDegLon><degSym>?
#TEST DD 01 N42.3, W102.4
#TEST DD 01 N 42.3, W 102.4
#TEST DD 01 N42.3°, W102.4°
#TEST DD 01 N98.3°, W192.4° #FAIL test out lon deg validation.
#TEST DD 01 N98.3°, W292.4° #FAIL test out lon deg validation.

// FORM: DD-xx, Decimal Deg, Postpending Hemisphere (a) DD.DDDDDD°H DDD.DDDDDD°H, optional deg symbol
#RULE DD 02 \b<decDegLat><degSym>?<hemiLat><latlonSep3><decDegLon><degSym>?<hemiLon>\b
#TEST DD 02 42.3N; 102.4W
#TEST DD 02 42.3°N; 102.4°W
#TEST DD 02 3: 42.18N 102.24W # PASS, avoid the 3:

// FORM: DD-xx, Decimal Deg, Preceding Hemisphere, +/- with deg symbol, (a) {-|+|}DD.DDDDDD° {-|+|}DDD.DDDDDD°
#RULE DD 03 \b<hemiLatSign>?<decDegLat><degSym><latlonSep3><hemiLonSign>?<decDegLon><degSym>
#TEST DD 03 +42.3°;-102.4°
#TEST DD 03 42.3° x -102.4°
#TEST DD 03 42.3° x 102.4°

// FORM: DD-xx, Decimal Deg with explicit LAT: LON: markers, both hemisphere and degree symbols are optional
#RULE DD 04 <latMarker>\s*<hemiLatPre>?<decDegLat><degSym>?<hemiLat>?<latlonSep3><lonMarker>\s*<hemiLonPre>?<decDegLon><degSym>?<hemiLon>?
#TEST DD 04 Latitude: 42.3N x Longitude: 102.3W
#TEST DD 04 Latitude: N42.3° x Longitude: W102.3°
#TEST DD 04 Latitude: 42.3 Longitude: 102.3
#TEST DD 04 Latitude= -42.3 Longitude= 102.3
#TEST DD 04 Latitude= 42.3 Longitude= 102.3
#TEST DD 04 Latitude: 42.3 Longitude: 102.3
#TEST DD 04 Latitude -42.3 Longitude 102.3
#TEST DD 04 Lat:-42.3 Lon:102.3
#TEST DD 04 Lat= -42.3 Long= 102.3

// #RULE DD 04a \w+[:=]\s*<hemiLatPre>?<decDegLat><latlonSep3><hemiLonPre>?<decDegLon>
// #TEST DD 04a COORD=-55.6, +134.700

//..........................................
// Degrees -- These patterns are so low-resolution, that for now it seems useless to allow these to run by default.
// Its approximately 100 KM x 100 KM
//..........................................

// FORM: DD-xx, Decimal Deg, Preceding Hemisphere (a) HDD° HDDD°, required deg symbol
#RULE DD 05 \b<hemiLatPre>?<degLat><degSym><latlonSep3><hemiLonPre>?<degLon><degSym>
#TEST DD 05 N42°, W102°
#TEST DD 05 N42W102 #FAIL -- hemispheres are right, but text is too short and lack of separator leads to ambiguity.
#TEST DD 05 -42°, 102°
#TEST DD 05 N 42°, W 102°
#TEST DD 05 -42°, +102°
#TEST DD 05 -42°, 102°
#TEST DD 05 -42°, 102°
#TEST DD 05 +42°, -102°

#RULE DD 06 \b<degLat><degSym>?\s?<hemiLat><latlonSep3><degLon><degSym>?\s?<hemiLon>\b
#TEST DD 06 42°N, 102°N FAIL
#TEST DD 06 42° N, 102° W
#TEST DD 06 42 N, 102 W
#TEST DD 06 42N x 102W
#TEST DD 06 00 N 130 WA FAIL

#RULE DD 07 \b<hemiLat>\s?<degLat><latlonSep3><hemiLon>\s?<degLon>\b
#TEST DD 07 N42, W102
#TEST DD 07 N42 x W102
#TEST DD 07 N 42 W 102

//#RULE DD 08 \b<hemiLatSign>?<decDegLat>,\s?<hemiLonSign>?<decDegLon>
//#TEST DD 08 text 54.67, -117 text # FAIL - no decimal longitude
//#TEST DD 08 text 54.33, -117.81 text
//#TEST DD 08 text +54.22, 117.71 text
//#TEST DD 08 text +54.22, 045.71 text
//#TEST DD 08 text +54.22, -045.71 text
//#TEST DD 08 text 54.11,117.61 text
//#TEST DD 08 YX = (54.11, 117.61) text # Coordinate tuple ()

0 comments on commit 2a77424

Please sign in to comment.