diff --git a/README.md b/README.md index 60aaf74..fe831d8 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ regex-analyzer and regex-composer ================================= -* A simple Regular Expression Analyzer for JavaScript / PHP / Python -* A simple and intuitive Regular Expression Composer for JavaScript / PHP / Python +* A simple Regular Expression Analyzer for PHP, Python, Node/JS +* A simple and intuitive Regular Expression Composer for PHP, Python, Node/JS *PHP / Python implementations in progress* @@ -13,7 +13,7 @@ These are used mostly as parts of other projects but uploaded here as standalone The analyzer needs a couple of extensions but overall works good. -See /test/test.js under /test folder for examples of how to use +See /test/js/test.js under /test folder for examples of how to use **RegExAnalyzer Live Example:** @@ -26,42 +26,61 @@ See /test/test.js under /test folder for examples of how to use [![Live Example](/test/screenshot2.png)](https://foo123.github.com/examples/regex-composer/) -**RegExComposer Example:** (see /test/test.js) +**RegExComposer Example:** (see /test/js/test.js) ```javascript - -// eg. in node - -var Composer = require('../build/regexcomposer.js').RegExComposer; -var outregex = new Composer() +var echo = console.log; + +echo("Testing Composer"); +echo("================"); + +var Composer = require('../../src/js/regexcomposer.js'); +var identifierSubRegex = new Composer( ) + + .characterGroup( ) + .characters( '_' ) + .range( 'a', 'z' ) + .end( ) + + .characterGroup( ) + .characters( '_' ) + .range( 'a', 'z' ) + .range( '0', '9' ) + .end( ) + + .zeroOrMore( ) + + .partial( ); + +var outregex = new Composer( ) - .startOfLine() + .startOfLine( ) - .either() + .either( ) - .characterGroup(false) - .characters('a', 'b', 'c', '.') - .range('d', 'f') - .end() + .sub( identifierSubRegex ) - .match('**aabb**') + .match( '**aabb**' ) - .any() + .any( ) - .space() + .space( ) - .digit(false).oneOrMore() + .digit( false ).oneOrMore( ) - .end() + .end( ) - .zeroOrMore(false) + .zeroOrMore( false ) - .endOfLine() + .endOfLine( ) - .compose('i'); + .compose( 'i' ); +echo("Partial: " + identifierSubRegex); echo("Composed: " + outregex.toString()); -echo("Expected: " + "/^([^abc\\.d-f]|\\*\\*aabb\\*\\*|.|\\s|\\D+)*?$/i"); +echo("Expected: " + "/^([_a-z][_a-z0-9]*|\\*\\*aabb\\*\\*|.|\\s|\\D+)*?$/i"); +echo("================"); +echo(); ``` diff --git a/build-min.bat b/build-min.bat deleted file mode 100644 index c8da7ca..0000000 --- a/build-min.bat +++ /dev/null @@ -1,18 +0,0 @@ -@echo off - -rem ################################################### -rem # -rem # The buildtools repository is at: -rem # https://github.com/foo123/scripts/buildtools -rem # -rem ################################################### - -rem to use the python build tool do: -rem python %BUILDTOOLS%\build.py --deps ".\dependencies-min" - -rem to use the php build tool do: -rem php -f %BUILDTOOLS%\build.php -- --deps=".\dependencies-min" - -rem to use the node build tool do: -call node %BUILDTOOLS%\build.js --deps ".\dependencies-min" -call node %BUILDTOOLS%\build.js --deps ".\dependencies-composer-min" diff --git a/build-min.sh b/build-min.sh deleted file mode 100644 index 8ce0703..0000000 --- a/build-min.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env sh - -################################################### -# -# The buildtools repository is at: -# https://github.com/foo123/scripts/buildtools -# -################################################### - -# to use the python build tool do: -python ../scripts/buildtools/build.py --deps "./dependencies-min" -python ../scripts/buildtools/build.py --deps "./dependencies-composer-min" - -# to use the php build tool do: -# php -f ../scripts/buildtools/build.php -- --deps="./dependencies-min" - -# to use the node build tool do: -# node ../scripts/buildtools/build.js --deps "./dependencies-min" diff --git a/build.bat b/build.bat deleted file mode 100644 index de654da..0000000 --- a/build.bat +++ /dev/null @@ -1,18 +0,0 @@ -@echo off - -rem ################################################### -rem # -rem # The buildtools repository is at: -rem # https://github.com/foo123/scripts/buildtools -rem # -rem ################################################### - -rem to use the python build tool do: -rem python %BUILDTOOLS%\build.py --deps ".\dependencies" - -rem to use the php build tool do: -rem php -f %BUILDTOOLS%\build.php -- --deps=".\dependencies" - -rem to use the node build tool do: -call node %BUILDTOOLS%\build.js --deps ".\dependencies" -call node %BUILDTOOLS%\build.js --deps ".\dependencies-composer" diff --git a/build.sh b/build.sh deleted file mode 100644 index 5174fd8..0000000 --- a/build.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env sh - -################################################### -# -# The buildtools repository is at: -# https://github.com/foo123/scripts/buildtools -# -################################################### - -# to use the python build tool do: -python ../scripts/buildtools/build.py --deps "./dependencies" -python ../scripts/buildtools/build.py --deps "./dependencies-composer" - -# to use the php build tool do: -# php -f ../scripts/buildtools/build.php -- --deps="./dependencies" - -# to use the node build tool do: -# node ../scripts/buildtools/build.js --deps "./dependencies" diff --git a/build/js/regexanalyzer.js b/build/js/regexanalyzer.js deleted file mode 100644 index 38a4214..0000000 --- a/build/js/regexanalyzer.js +++ /dev/null @@ -1,1183 +0,0 @@ -/** -* -* RegExAnalyzer -* @version: 0.4 -* -* A simple Regular Expression Analyzer in JavaScript -* https://github.com/foo123/regex-analyzer -* -**/!function( root, name, factory ) { - "use strict"; - - // - // export the module, umd-style (no other dependencies) - var isCommonJS = ("object" === typeof(module)) && module.exports, - isAMD = ("function" === typeof(define)) && define.amd, m; - - // CommonJS, node, etc.. - if ( isCommonJS ) - module.exports = (module.$deps = module.$deps || {})[ name ] = module.$deps[ name ] || (factory.call( root, {NODE:module} ) || 1); - - // AMD, requireJS, etc.. - else if ( isAMD && ("function" === typeof(require)) && ("function" === typeof(require.specified)) && require.specified(name) ) - define( name, ['require', 'exports', 'module'], function( require, exports, module ){ return factory.call( root, {AMD:module} ); } ); - - // browser, web worker, etc.. + AMD, other loaders - else if ( !(name in root) ) - (root[ name ] = (m=factory.call( root, {} ) || 1)) && isAMD && define( name, [], function( ){ return m; } ); - - -}( /* current root */ this, - /* module name */ "RegExAnalyzer", - /* module factory */ function( exports ) { - - /* main code starts here */ - -/** -* -* RegExAnalyzer -* @version: 0.4 -* -* A simple Regular Expression Analyzer in JavaScript -* https://github.com/foo123/regex-analyzer -* -**/ -var undef = undefined, - escapeChar = '\\', - - repeatsRegex = /^\{\s*(\d+)\s*,?\s*(\d+)?\s*\}/, - - unicodeRegex = /^u([0-9a-fA-F]{4})/, - - hexRegex = /^x([0-9a-fA-F]{2})/, - - specialChars = { - "." : "MatchAnyChar", - "|" : "MatchEither", - "?" : "MatchZeroOrOne", - "*" : "MatchZeroOrMore", - "+" : "MatchOneOrMore", - "^" : "MatchStart", - "$" : "MatchEnd", - "{" : "StartRepeats", - "}" : "EndRepeats", - "(" : "StartGroup", - ")" : "EndGroup", - "[" : "StartCharGroup", - "]" : "EndCharGroup" - }, - - /* - http://www.javascriptkit.com/javatutors/redev2.shtml - - \f matches form-feed. - \r matches carriage return. - \n matches linefeed. - \t matches horizontal tab. - \v matches vertical tab. - \0 matches NUL character. - [\b] matches backspace. - \s matches whitespace (short for [\f\n\r\t\v\u00A0\u2028\u2029]). - \S matches anything but a whitespace (short for [^\f\n\r\t\v\u00A0\u2028\u2029]). - \w matches any alphanumerical character (word characters) including underscore (short for [a-zA-Z0-9_]). - \W matches any non-word characters (short for [^a-zA-Z0-9_]). - \d matches any digit (short for [0-9]). - \D matches any non-digit (short for [^0-9]). - \b matches a word boundary (the position between a word and a space). - \B matches a non-word boundary (short for [^\b]). - \cX matches a control character. E.g: \cm matches control-M. - \xhh matches the character with two characters of hexadecimal code hh. - \uhhhh matches the Unicode character with four characters of hexadecimal code hhhh. - */ - specialCharsEscaped = { - "\\" : "EscapeChar", - "/" : "/", - "0" : "NULChar", - "f" : "FormFeed", - "n" : "LineFeed", - "r" : "CarriageReturn", - "t" : "HorizontalTab", - "v" : "VerticalTab", - "b" : "MatchWordBoundary", - "B" : "MatchNonWordBoundary", - "s" : "MatchSpaceChar", - "S" : "MatchNonSpaceChar", - "w" : "MatchWordChar", - "W" : "MatchNonWordChar", - "d" : "MatchDigitChar", - "D" : "MatchNonDigitChar" - }, - Obj = Object, Arr = Array, - to_string = Obj.prototype.toString, - - rnd = function(a,b){ return Math.round((b-a)*Math.random()+a); }, - - // http://stackoverflow.com/questions/12376870/create-an-array-of-characters-from-specified-range - getCharRange = function(first, last) { - if ( first && ( first instanceof Arr || "[object Array]" == to_string.call(first) ) ) - { - last = first[1]; - first = first[0]; - } - var ch, chars, start = first.charCodeAt(0), end = last.charCodeAt(0); - - if ( end == start ) return [ String.fromCharCode( start ) ]; - - chars = []; - for (ch = start; ch <= end; ++ch) - chars.push( String.fromCharCode( ch ) ); - - return chars; - }, - - SPACES = " \r\n\t\v".split(""), - PUNCTS = "~!@#$%^&*()-+=[]{}\\|;:,./<>?".split(""), - DIGITS = "0123456789".split(""), - ALPHAS = ["_"].concat(getCharRange("a", "z")).concat(getCharRange("A", "Z")), - ALL = SPACES.concat(PUNCTS).concat(DIGITS).concat(ALPHAS), - punct = function( ){ return PUNCTS[rnd(0, PUNCTS.length-1)]; }, - space = function( positive ){ - return false !== positive - ? SPACES[rnd(0, SPACES.length-1)] - : [punct(), digit(), alpha()][rnd(0,2)] - ; - }, - digit = function( positive ){ - return false !== positive - ? DIGITS[rnd(0, DIGITS.length-1)] - : [punct(), space(), alpha()][rnd(0,2)] - ; - }, - alpha = function( positive ){ - return false !== positive - ? ALPHAS[rnd(0, ALPHAS.length-1)] - : [punct(), space(), digit()][rnd(0,2)] - ; - }, - word = function( positive ){ - return false !== positive - ? ["_"].concat(ALPHAS).concat(DIGITS)[rnd(0, ALPHAS.length+DIGITS.length)] - : [punct(), space()][rnd(0,1)] - ; - }, - character = function( chars, positive ){ - if ( false !== positive ) return chars.length ? chars[rnd(0, chars.length-1)] : ''; - var choices = ALL.filter(function(c){ return 0 > chars.indexOf(c); }); - return choices.length ? choices[rnd(0, choices.length-1)] : ''; - }, - - concat = function(p1, p2) { - if ( p2 && ( p2 instanceof Arr || "[object Array]" == to_string.call(p2) ) ) - { - for (var p=0, l=p2.length; p= l || !p || "Quantifier" != p.type || - ( !p.flags.MatchZeroOrMore && !p.flags.MatchZeroOrOne && "0"!=p.flags.MatchMinimum ) - ); - while ( !done ) - { - tmp = getPeekChars( p.part ); - peek = concat( peek, tmp.peek ); - negativepeek = concat( negativepeek, tmp.negativepeek ); - - i++; - p = part.part[i]; - - done = ( - i >= l || !p || "Quantifier" != p.type || - ( !p.flags.MatchZeroOrMore && !p.flags.MatchZeroOrOne && "0"!=p.flags.MatchMinimum ) - ); - } - if ( i < l ) - { - p = part.part[i]; - - if ("Special" == p.type && ('^'==p.part || '$'==p.part)) p = part.part[i+1] || null; - - if (p && "Quantifier" == p.type) p = p.part; - - if (p) - { - tmp = getPeekChars( p ); - peek = concat( peek, tmp.peek ); - negativepeek = concat( negativepeek, tmp.negativepeek ); - } - } - } - - else if ( "CharGroup" == type ) - { - current = ( part.flags.NotMatch ) ? negativepeek : peek; - - for (i=0, l=part.part.length; i 0 ) - { - repeat--; - sample += generate( p.part, isCaseInsensitive ); - } - } - else - { - sample += generate( p, isCaseInsensitive ); - } - } - } - - else if ( "CharGroup" == type ) - { - var chars = [], ptype; - - for (i=0, l=part.part.length; i 1 ) - { - sequence.push( { part: prev.part.slice(0, -1), flags: {}, type: "String" } ); - prev.part = prev.part.slice(-1); - } - sequence.push( { part: prev, flags: flag, type: "Quantifier" } ); - } - - // quantifiers - else if ( '*' == ch || '+' == ch || '?' == ch ) - { - if ( word.length ) - { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; - } - flag = {}; - flag[ specialChars[ch] ] = 1; - if ( '?' == self.regex.charAt(self.pos) ) - { - flag[ "isGreedy" ] = 0; - self.pos++; - } - else - { - flag[ "isGreedy" ] = 1; - } - var prev = sequence.pop(); - if ( "String" == prev.type && prev.part.length > 1 ) - { - sequence.push( { part: prev.part.slice(0, -1), flags: {}, type: "String" } ); - prev.part = prev.part.slice(-1); - } - sequence.push( { part: prev, flags: flag, type: "Quantifier" } ); - } - - // special characters like ^, $, ., etc.. - else if ( specialChars[ch] ) - { - if ( word.length ) - { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; - } - flag = {}; - flag[ specialChars[ch] ] = 1; - sequence.push( { part: ch, flags: flag, type: "Special" } ); - } - - else - { - word += ch; - } - } - } - if ( word.length ) - { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; - } - if ( alternation.length ) - { - alternation.push( { part: sequence, flags: {}, type: "Sequence" } ); - sequence = []; - flag = {}; - flag[ specialChars['|'] ] = 1; - return { part: { part: alternation, flags: flag, type: "Alternation" }, flags: flags, type: "Group" }; - } - else - { - return { part: { part: sequence, flags: {}, type: "Sequence" }, flags: flags, type: "Group" }; - } - }, - - chargroup = function( self ) { - var sequence = [], chars = [], flags = {}, flag, ch, prevch, range, isRange = false, match, isUnicode, escaped = false; - - if ( '^' == self.regex.charAt( self.pos ) ) - { - flags[ "NotMatch" ] = 1; - self.pos++; - } - - while ( self.pos < self.regex.length ) - { - isUnicode = false; - prevch = ch; - ch = self.regex.charAt( self.pos++ ); - - escaped = (escapeChar == ch) ? true : false; - if ( escaped ) ch = self.regex.charAt( self.pos++ ); - - if ( escaped ) - { - // unicode character - if ( 'u' == ch ) - { - match = unicodeRegex.exec( self.regex.substr( self.pos-1 ) ); - self.pos += match[0].length-1; - ch = String.fromCharCode(parseInt(match[1], 16)); - isUnicode = true; - } - - // hex character - else if ( 'x' == ch ) - { - match = hexRegex.exec( self.regex.substr( self.pos-1 ) ); - self.pos += match[0].length-1; - ch = String.fromCharCode(parseInt(match[1], 16)); - isUnicode = true; - } - } - - if ( isRange ) - { - if ( chars.length ) - { - sequence.push( { part: chars, flags: {}, type: "Chars" } ); - chars = []; - } - range[1] = ch; - isRange = false; - sequence.push( { part: range, flags: {}, type: "CharRange" } ); - } - else - { - if ( escaped ) - { - if ( !isUnicode && specialCharsEscaped[ch] && '/' != ch) - { - if ( chars.length ) - { - sequence.push( { part: chars, flags: {}, type: "Chars" } ); - chars = []; - } - flag = {}; - flag[ specialCharsEscaped[ch] ] = 1; - sequence.push( { part: ch, flags: flag, type: "Special" } ); - } - - else - { - chars.push( ch ); - } - } - - else - { - // end of char group - if ( ']' == ch ) - { - if ( chars.length ) - { - sequence.push( { part: chars, flags: {}, type: "Chars" } ); - chars = []; - } - return { part: sequence, flags: flags, type: "CharGroup" }; - } - - else if ( '-' == ch ) - { - range = [prevch, '']; - chars.pop(); - isRange = true; - } - - else - { - chars.push( ch ); - } - } - } - } - if ( chars.length ) - { - sequence.push( { part: chars, flags: {}, type: "Chars" } ); - chars = []; - } - return { part: sequence, flags: flags, type: "CharGroup" }; - }, - - analyze = function( regex ) { - var self = {pos: 0, groupIndex: 0, regex: regex}; - var ch, word = '', alternation = [], sequence = [], flag, match, escaped = false; - - while ( self.pos < self.regex.length ) - { - ch = self.regex.charAt( self.pos++ ); - - // \\abc - escaped = (escapeChar == ch) ? true : false; - if ( escaped ) ch = self.regex.charAt( self.pos++ ); - - if ( escaped ) - { - // unicode character - if ( 'u' == ch ) - { - if ( word.length ) - { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; - } - match = unicodeRegex.exec( self.regex.substr( self.pos-1 ) ); - self.pos += match[0].length-1; - sequence.push( { part: match[0], flags: { "Char": String.fromCharCode(parseInt(match[1], 16)), "Code": match[1] }, type: "UnicodeChar" } ); - } - - // hex character - else if ( 'x' == ch ) - { - if ( word.length ) - { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; - } - match = hexRegex.exec( self.regex.substr( self.pos-1 ) ); - self.pos += match[0].length-1; - sequence.push( { part: match[0], flags: { "Char": String.fromCharCode(parseInt(match[1], 16)), "Code": match[1] }, type: "HexChar" } ); - } - - else if ( specialCharsEscaped[ch] && '/' != ch) - { - if ( word.length ) - { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; - } - flag = {}; - flag[ specialCharsEscaped[ch] ] = 1; - sequence.push( { part: ch, flags: flag, type: "Special" } ); - } - - else - { - word += ch; - } - } - - else - { - // parse alternation - if ( '|' == ch ) - { - if ( word.length ) - { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; - } - alternation.push( { part: sequence, flags: {}, type: "Sequence" } ); - sequence = []; - } - - // parse character group - else if ( '[' == ch ) - { - if ( word.length ) - { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; - } - sequence.push( chargroup( self ) ); - } - - // parse sub-group - else if ( '(' == ch ) - { - if ( word.length ) - { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; - } - sequence.push( subgroup( self ) ); - } - - // parse num repeats - else if ( '{' == ch ) - { - if ( word.length ) - { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; - } - match = repeatsRegex.exec( self.regex.substr( self.pos-1 ) ); - self.pos += match[0].length-1; - flag = { part: match[0], "MatchMinimum": match[1], "MatchMaximum": match[2] || "unlimited" }; - flag[ specialChars[ch] ] = 1; - if ( '?' == self.regex.charAt(self.pos) ) - { - flag[ "isGreedy" ] = 0; - self.pos++; - } - else - { - flag[ "isGreedy" ] = 1; - } - var prev = sequence.pop(); - if ( "String" == prev.type && prev.part.length > 1 ) - { - sequence.push( { part: prev.part.slice(0, -1), flags: {}, type: "String" } ); - prev.part = prev.part.slice(-1); - } - sequence.push( { part: prev, flags: flag, type: "Quantifier" } ); - } - - // quantifiers - else if ( '*' == ch || '+' == ch || '?' == ch ) - { - if ( word.length ) - { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; - } - flag = {}; - flag[ specialChars[ch] ] = 1; - if ( '?' == self.regex.charAt(self.pos) ) - { - flag[ "isGreedy" ] = 0; - self.pos++; - } - else - { - flag[ "isGreedy" ] = 1; - } - var prev = sequence.pop(); - if ( "String" == prev.type && prev.part.length > 1 ) - { - sequence.push( { part: prev.part.slice(0, -1), flags: {}, type: "String" } ); - prev.part = prev.part.slice(-1); - } - sequence.push( { part: prev, flags: flag, type: "Quantifier" } ); - } - - // special characters like ^, $, ., etc.. - else if ( specialChars[ch] ) - { - if ( word.length ) - { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; - } - flag = {}; - flag[ specialChars[ch] ] = 1; - sequence.push( { part: ch, flags: flag, type: "Special" } ); - } - - else - { - word += ch; - } - } - } - - if ( word.length ) - { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; - } - - if ( alternation.length ) - { - alternation.push( { part: sequence, flags: {}, type: "Sequence" } ); - sequence = []; - flag = {}; - flag[ specialChars['|'] ] = 1; - parts = { part: alternation, flags: flag, type: "Alternation" }; - } - else - { - parts = { part: sequence, flags: {}, type: "Sequence" }; - } - return parts; - } -; - -// A simple (js-flavored) regular expression analyzer -var Analyzer = function( regex, delim ) { - if ( regex ) this.regex( regex, delim ); -}; -Analyzer.VERSION = "0.4"; -Analyzer.getCharRange = getCharRange; -Analyzer.prototype = { - - constructor: Analyzer, - - $regex: null, - $flags: null, - $parts: null, - $needsRefresh: true, - - dispose: function( ) { - var self = this; - self.$regex = null; - self.$flags = null; - self.$parts = null; - return self; - }, - - regex: function( regex, delim ) { - var self = this; - if ( regex ) - { - delim = delim || '/'; - var flags = {}, r = regex.toString( ), l = r.length, ch = r.charAt(l-1); - - // parse regex flags - while ( delim !== ch ) - { - flags[ ch ] = 1; - r = r.substr(0, l-1); - l = r.length; - ch = r.charAt(l-1); - } - // remove regex delimiters - if ( delim == r.charAt(0) && delim == r.charAt(l-1) ) r = r.substr(1, l-2); - - if ( self.$regex !== r ) self.$needsRefresh = true; - self.$regex = r; self.$flags = flags; - } - return self; - }, - - analyze: function( ) { - var self = this; - if ( self.$needsRefresh ) - { - self.$parts = analyze( self.$regex ); - self.$needsRefresh = false; - } - return self; - }, - - getParts: function( ) { - var self = this; - if ( self.$needsRefresh ) self.analyze( ); - return self.$parts; - }, - - // experimental feature - generateSample: function( ) { - var self = this; - if ( self.$needsRefresh ) self.analyze( ); - return generate( self.$parts, self.$flags && self.$flags.i ); - }, - - // experimental feature - getPeekChars: function( ) { - var self = this, isCaseInsensitive, - peek, n, c, p, cases; - - if ( self.$needsRefresh ) self.analyze( ); - - peek = getPeekChars( self.$parts ); - isCaseInsensitive = self.$flags && self.$flags.i; - - for (n in peek) - { - cases = {}; - // either peek or negativepeek - p = peek[n]; - for (c in p) - { - if ('\\d' == c) - { - delete p[c]; - cases = concat(cases, getCharRange('0', '9')); - } - - else if ('\\s' == c) - { - delete p[c]; - cases = concat(cases, ['\f','\n','\r','\t','\v','\u00A0','\u2028','\u2029']); - } - - else if ('\\w' == c) - { - delete p[c]; - cases = concat(cases, ['_'].concat(getCharRange('0', '9')).concat(getCharRange('a', 'z')).concat(getCharRange('A', 'Z'))); - } - - else if ('\\.' == c) - { - delete p[c]; - cases[ specialChars['.'] ] = 1; - } - - /*else if ('\\^' == c) - { - delete p[c]; - cases[ specialChars['^'] ] = 1; - } - - else if ('\\$' == c) - { - delete p[c]; - cases[ specialChars['$'] ] = 1; - }*/ - - else if ( '\\' != c.charAt(0) && isCaseInsensitive ) - { - cases[ c.toLowerCase() ] = 1; - cases[ c.toUpperCase() ] = 1; - } - - else if ( '\\' == c.charAt(0) ) - { - delete p[c]; - } - } - peek[n] = concat(p, cases); - } - return peek; - } -}; - - -exports['RegExAnalyzer'] = Analyzer; - - /* main code ends here */ - /* export the module */ - return exports["RegExAnalyzer"]; -}); \ No newline at end of file diff --git a/build/js/regexanalyzer.min.js b/build/js/regexanalyzer.min.js deleted file mode 100644 index 648f097..0000000 --- a/build/js/regexanalyzer.min.js +++ /dev/null @@ -1,9 +0,0 @@ -/** -* -* RegExAnalyzer -* @version: 0.4 -* -* A simple Regular Expression Analyzer in JavaScript -* https://github.com/foo123/regex-analyzer -* -**/!function(e,t,r){"use strict";var a,p="object"==typeof module&&module.exports,s="function"==typeof define&&define.amd;p?module.exports=(module.$deps=module.$deps||{})[t]=module.$deps[t]||r.call(e,{NODE:module})||1:s&&"function"==typeof require&&"function"==typeof require.specified&&require.specified(t)?define(t,["require","exports","module"],function(t,a,p){return r.call(e,{AMD:p})}):t in e||(e[t]=a=r.call(e,{})||1)&&s&&define(t,[],function(){return a})}(this,"RegExAnalyzer",function(e){var t="\\",r=/^\{\s*(\d+)\s*,?\s*(\d+)?\s*\}/,a=/^u([0-9a-fA-F]{4})/,p=/^x([0-9a-fA-F]{2})/,s={".":"MatchAnyChar","|":"MatchEither","?":"MatchZeroOrOne","*":"MatchZeroOrMore","+":"MatchOneOrMore","^":"MatchStart",$:"MatchEnd","{":"StartRepeats","}":"EndRepeats","(":"StartGroup",")":"EndGroup","[":"StartCharGroup","]":"EndCharGroup"},n={"\\":"EscapeChar","/":"/",0:"NULChar",f:"FormFeed",n:"LineFeed",r:"CarriageReturn",t:"HorizontalTab",v:"VerticalTab",b:"MatchWordBoundary",B:"MatchNonWordBoundary",s:"MatchSpaceChar",S:"MatchNonSpaceChar",w:"MatchWordChar",W:"MatchNonWordChar",d:"MatchDigitChar",D:"MatchNonDigitChar"},h=Object,l=Array,g=h.prototype.toString,o=function(e,t){return Math.round((t-e)*Math.random()+e)},i=function(e,t){e&&(e instanceof l||"[object Array]"==g.call(e))&&(t=e[1],e=e[0]);var r,a,p=e.charCodeAt(0),s=t.charCodeAt(0);if(s==p)return[String.fromCharCode(p)];for(a=[],r=p;s>=r;++r)a.push(String.fromCharCode(r));return a},f=" \r\n ".split(""),u="~!@#$%^&*()-+=[]{}\\|;:,./<>?".split(""),c="0123456789".split(""),y=["_"].concat(i("a","z")).concat(i("A","Z")),d=f.concat(u).concat(c).concat(y),C=function(){return u[o(0,u.length-1)]},S=function(e){return!1!==e?f[o(0,f.length-1)]:[C(),x(),M()][o(0,2)]},x=function(e){return!1!==e?c[o(0,c.length-1)]:[C(),S(),M()][o(0,2)]},M=function(e){return!1!==e?y[o(0,y.length-1)]:[C(),S(),x()][o(0,2)]},A=function(e){return!1!==e?["_"].concat(y).concat(c)[o(0,y.length+c.length)]:[C(),S()][o(0,1)]},m=function(e,t){if(!1!==t)return e.length?e[o(0,e.length-1)]:"";var r=d.filter(function(t){return 0>e.indexOf(t)});return r.length?r[o(0,r.length-1)]:""},v=function(e,t){if(t&&(t instanceof l||"[object Array]"==g.call(t)))for(var r=0,a=t.length;a>r;r++)e[t[r]]=1;else for(var r in t)e[r]=1;return e},$=function(e,t){return t?(e.charAt&&(e=e.split("")),e=e.map(function(e){return o(0,1)?e.toLowerCase():e.toUpperCase()}),t||(e=e.join("")),e):o(0,1)?e.toLowerCase():e.toUpperCase()},G=function(e){var t,r,a,p,s,n,h,l,g={},o={};if(h=e.type,"Alternation"==h)for(a=0,p=e.part.length;p>a;a++)s=G(e.part[a]),g=v(g,s.peek),o=v(o,s.negativepeek);else if("Group"==h)s=G(e.part),g=v(g,s.peek),o=v(o,s.negativepeek);else if("Sequence"==h){for(a=0,p=e.part.length,r=e.part[a],n=a>=p||!r||"Quantifier"!=r.type||!r.flags.MatchZeroOrMore&&!r.flags.MatchZeroOrOne&&"0"!=r.flags.MatchMinimum;!n;)s=G(r.part),g=v(g,s.peek),o=v(o,s.negativepeek),a++,r=e.part[a],n=a>=p||!r||"Quantifier"!=r.type||!r.flags.MatchZeroOrMore&&!r.flags.MatchZeroOrOne&&"0"!=r.flags.MatchMinimum;p>a&&(r=e.part[a],"Special"!=r.type||"^"!=r.part&&"$"!=r.part||(r=e.part[a+1]||null),r&&"Quantifier"==r.type&&(r=r.part),r&&(s=G(r),g=v(g,s.peek),o=v(o,s.negativepeek)))}else if("CharGroup"==h)for(t=e.flags.NotMatch?o:g,a=0,p=e.part.length;p>a;a++)r=e.part[a],l=r.type,"Chars"==l?t=v(t,r.part):"CharRange"==l?t=v(t,i(r.part)):"UnicodeChar"==l||"HexChar"==l?t[r.flags.Char]=1:"Special"==l&&("D"==r.part?e.flags.NotMatch?g["\\d"]=1:o["\\d"]=1:"W"==r.part?e.flags.NotMatch?g["\\w"]=1:o["\\W"]=1:"S"==r.part?e.flags.NotMatch?g["\\s"]=1:o["\\s"]=1:t["\\"+r.part]=1);else"String"==h?g[e.part.charAt(0)]=1:"Special"!=h||e.flags.MatchStart||e.flags.MatchEnd?("UnicodeChar"==h||"HexChar"==h)&&(g[e.flags.Char]=1):"D"==e.part?o["\\d"]=1:"W"==e.part?o["\\W"]=1:"S"==e.part?o["\\s"]=1:g["\\"+e.part]=1;return{peek:g,negativepeek:o}},O=function(e,t){var r,a,p,s,n="";if(s=e.type,"Alternation"==s)n+=O(e.part[o(0,e.part.length-1)],t);else if("Group"==s)n+=O(e.part,t);else if("Sequence"==s){var h,l,g;for(p=e.part.length,r=e.part[a],a=0;p>a;a++)if(r=e.part[a])if(h=1,"Quantifier"==r.type)for(r.flags.MatchZeroOrMore?h=o(0,10):r.flags.MatchZeroOrOne?h=o(0,1):r.flags.MatchOneOrMore?h=o(1,11):(l=parseInt(r.flags.MatchMinimum,10),g=parseInt(r.flags.MatchMaximum,10),h=o(l,isNaN(g)?l+10:g));h>0;)h--,n+=O(r.part,t);else n+=O(r,t)}else if("CharGroup"==s){var f,u=[];for(a=0,p=e.part.length;p>a;a++)r=e.part[a],f=r.type,"Chars"==f?u=u.concat(t?$(r.part,!0):r.part):"CharRange"==f?u=u.concat(t?$(i(r.part),!0):i(r.part)):"UnicodeChar"==f||"HexChar"==f?u.push(t?$(r.flags.Char):r.flags.Char):"Special"==f&&u.push("D"==r.part?x(!1):"W"==r.part?A(!1):"S"==r.part?S(!1):"d"==r.part?x():"w"==r.part?A():"s"==r.part?S():"\\"+r.part);n+=m(u,!e.flags.NotMatch)}else"String"==s?n+=t?$(e.part):e.part:"Special"!=s||e.flags.MatchStart||e.flags.MatchEnd?("UnicodeChar"==s||"HexChar"==s)&&(n+=t?$(e.flags.Char):e.flags.Char):n+="D"==e.part?x(!1):"W"==e.part?A(!1):"S"==e.part?S(!1):"d"==e.part?x():"w"==e.part?A():"s"==e.part?S():"\\"+e.part;return n},b=function(e){var h,l,g,o="",i=[],f=[],u={},c=!1,y=e.regex.substr(e.pos,2);for("?:"==y?(u.NotCaptured=1,e.pos+=2):"?="==y?(u.LookAhead=1,e.pos+=2):"?!"==y&&(u.NegativeLookAhead=1,e.pos+=2),u.GroupIndex=++e.groupIndex;e.pos1&&(f.push({part:d.part.slice(0,-1),flags:{},type:"String"}),d.part=d.part.slice(-1)),f.push({part:d,flags:l,type:"Quantifier"})}else if("*"==h||"+"==h||"?"==h){o.length&&(f.push({part:o,flags:{},type:"String"}),o=""),l={},l[s[h]]=1,"?"==e.regex.charAt(e.pos)?(l.isGreedy=0,e.pos++):l.isGreedy=1;var d=f.pop();"String"==d.type&&d.part.length>1&&(f.push({part:d.part.slice(0,-1),flags:{},type:"String"}),d.part=d.part.slice(-1)),f.push({part:d,flags:l,type:"Quantifier"})}else s[h]?(o.length&&(f.push({part:o,flags:{},type:"String"}),o=""),l={},l[s[h]]=1,f.push({part:h,flags:l,type:"Special"})):o+=h}return o.length&&(f.push({part:o,flags:{},type:"String"}),o=""),i.length?(i.push({part:f,flags:{},type:"Sequence"}),f=[],l={},l[s["|"]]=1,{part:{part:i,flags:l,type:"Alternation"},flags:u,type:"Group"}):{part:{part:f,flags:{},type:"Sequence"},flags:u,type:"Group"}},R=function(e){var r,s,h,l,g,o,i=[],f=[],u={},c=!1,y=!1;for("^"==e.regex.charAt(e.pos)&&(u.NotMatch=1,e.pos++);e.pos1&&(u.push({part:y.part.slice(0,-1),flags:{},type:"String"}),y.part=y.part.slice(-1)),u.push({part:y,flags:l,type:"Quantifier"})}else if("*"==h||"+"==h||"?"==h){i.length&&(u.push({part:i,flags:{},type:"String"}),i=""),l={},l[s[h]]=1,"?"==o.regex.charAt(o.pos)?(l.isGreedy=0,o.pos++):l.isGreedy=1;var y=u.pop();"String"==y.type&&y.part.length>1&&(u.push({part:y.part.slice(0,-1),flags:{},type:"String"}),y.part=y.part.slice(-1)),u.push({part:y,flags:l,type:"Quantifier"})}else s[h]?(i.length&&(u.push({part:i,flags:{},type:"String"}),i=""),l={},l[s[h]]=1,u.push({part:h,flags:l,type:"Special"})):i+=h;return i.length&&(u.push({part:i,flags:{},type:"String"}),i=""),f.length?(f.push({part:u,flags:{},type:"Sequence"}),u=[],l={},l[s["|"]]=1,parts={part:f,flags:l,type:"Alternation"}):parts={part:u,flags:{},type:"Sequence"},parts},q=function(e,t){e&&this.regex(e,t)};return q.VERSION="0.4",q.getCharRange=i,q.prototype={constructor:q,$regex:null,$flags:null,$parts:null,$needsRefresh:!0,dispose:function(){var e=this;return e.$regex=null,e.$flags=null,e.$parts=null,e},regex:function(e,t){var r=this;if(e){t=t||"/";for(var a={},p=e.toString(),s=p.length,n=p.charAt(s-1);t!==n;)a[n]=1,p=p.substr(0,s-1),s=p.length,n=p.charAt(s-1);t==p.charAt(0)&&t==p.charAt(s-1)&&(p=p.substr(1,s-2)),r.$regex!==p&&(r.$needsRefresh=!0),r.$regex=p,r.$flags=a}return r},analyze:function(){var e=this;return e.$needsRefresh&&(e.$parts=N(e.$regex),e.$needsRefresh=!1),e},getParts:function(){var e=this;return e.$needsRefresh&&e.analyze(),e.$parts},generateSample:function(){var e=this;return e.$needsRefresh&&e.analyze(),O(e.$parts,e.$flags&&e.$flags.i)},getPeekChars:function(){var e,t,r,a,p,n,h=this;h.$needsRefresh&&h.analyze(),t=G(h.$parts),e=h.$flags&&h.$flags.i;for(r in t){n={},p=t[r];for(a in p)"\\d"==a?(delete p[a],n=v(n,i("0","9"))):"\\s"==a?(delete p[a],n=v(n,["\f","\n","\r"," "," "," ","\u2028","\u2029"])):"\\w"==a?(delete p[a],n=v(n,["_"].concat(i("0","9")).concat(i("a","z")).concat(i("A","Z")))):"\\."==a?(delete p[a],n[s["."]]=1):"\\"!=a.charAt(0)&&e?(n[a.toLowerCase()]=1,n[a.toUpperCase()]=1):"\\"==a.charAt(0)&&delete p[a];t[r]=v(p,n)}return t}},e.RegExAnalyzer=q,e.RegExAnalyzer}); \ No newline at end of file diff --git a/build/js/regexcomposer.js b/build/js/regexcomposer.js deleted file mode 100644 index afc2697..0000000 --- a/build/js/regexcomposer.js +++ /dev/null @@ -1,332 +0,0 @@ -/** -* -* RegExComposer -* @version: 0.4 -* -* A simple and intuitive Regular Expression Composer in JavaScript -* https://github.com/foo123/regex-analyzer -* -**/!function( root, name, factory ) { - "use strict"; - - // - // export the module, umd-style (no other dependencies) - var isCommonJS = ("object" === typeof(module)) && module.exports, - isAMD = ("function" === typeof(define)) && define.amd, m; - - // CommonJS, node, etc.. - if ( isCommonJS ) - module.exports = (module.$deps = module.$deps || {})[ name ] = module.$deps[ name ] || (factory.call( root, {NODE:module} ) || 1); - - // AMD, requireJS, etc.. - else if ( isAMD && ("function" === typeof(require)) && ("function" === typeof(require.specified)) && require.specified(name) ) - define( name, ['require', 'exports', 'module'], function( require, exports, module ){ return factory.call( root, {AMD:module} ); } ); - - // browser, web worker, etc.. + AMD, other loaders - else if ( !(name in root) ) - (root[ name ] = (m=factory.call( root, {} ) || 1)) && isAMD && define( name, [], function( ){ return m; } ); - - -}( /* current root */ this, - /* module name */ "RegExComposer", - /* module factory */ function( exports ) { - - /* main code starts here */ - -/** -* -* RegExComposer -* @version: 0.4 -* -* A simple and intuitive Regular Expression Composer in JavaScript -* https://github.com/foo123/regex-analyzer -* -**/ -var undef = undefined, OP = Object.prototype, AP = Array.prototype, - to_string = OP.toString, - - slice = function( a ) { return AP.slice.apply(a, AP.slice.call(arguments, 1)); }, - - esc = function( s ) { return s.replace(/([.*+?^${}()|[\]\/\\\-])/g, '\\$1'); }, - - flatten = function( a ) { - var r = [], i = 0; - while (i < a.length) r = r.concat(a[i++]); - return r; - }, - - getArgs = function( args, asArray ) { - /*var a = slice(args); - if ( asArray && a[0] && - ( a[0] instanceof Array || '[object Array]' == to_string.call(a[0]) ) - ) - a = a[0];*/ - return flatten( slice( args ) ); //a; - }, - - T_SEQ = 2, T_EITHER = 4, T_GROUP = 8, T_CHARGROUP = 16 -; - - -// A simple (js-flavored) regular expression composer -var Composer = function( ) { - this.$regex = null; - this.reset( ); -}; -Composer.VERSION = "0.4"; -Composer.prototype = { - - constructor: Composer, - - $level: 0, - $regex: null, - $parts: null, - - dispose: function( ) { - var self = this; - self.$level = null; - self.$regex = null; - self.$parts = null; - return self; - }, - - reset: function( ) { - var self = this; - self.$level = 0; - self.$parts = [{part: [], type: T_SEQ, flag: ''}]; - return self; - }, - - compose: function( /* flags */ ) { - var self = this; - self.$regex = new RegExp(self.$parts[0].part.join(''), slice(arguments).join('')); - self.reset( ); - return self.$regex; - }, - - partial: function( reset ) { - var self = this, p = self.$parts[0].part.join(''); - if ( false!==reset ) self.reset( ); - return p; - }, - - repeat: function( min, max, greedy ) { - var self = this; - if ( undef === min ) return self; - var repeat = ( undef === max ) ? ('{'+min+'}') : ('{'+min+','+max+'}'); - - self.$parts[self.$level].part[self.$parts[self.$level].part.length-1] += (false===greedy) ? (repeat+'?') : repeat; - return self; - }, - - zeroOrOne: function( greedy ) { - var self = this; - self.$parts[self.$level].part[self.$parts[self.$level].part.length-1] += (false===greedy) ? '??' : '?'; - return self; - }, - - zeroOrMore: function( greedy ) { - var self = this; - self.$parts[self.$level].part[self.$parts[self.$level].part.length-1] += (false===greedy) ? '*?' : '*'; - return self; - }, - - oneOrMore: function( greedy ) { - var self = this; - self.$parts[self.$level].part[self.$parts[self.$level].part.length-1] += (false===greedy) ? '+?' : '+'; - return self; - }, - - sub: function( partialRegex, withParen ) { - var self = this; - if ( undef !== partialRegex ) - { - if ( withParen ) partialRegex = '(' + partialRegex + ')'; - self.$parts[self.$level].part.push( partialRegex ); - } - return self; - }, - - match: function( literalStr ) { - var self = this; - if ( undef !== literalStr ) - self.$parts[self.$level].part.push( esc(literalStr) ); - return self; - }, - - startOfInput: function( ) { - var self = this; - self.$parts[self.$level].part.push('^'); - return self; - }, - - endOfInput: function( ) { - var self = this; - self.$parts[self.$level].part.push('$'); - return self; - }, - - any: function( ) { - var self = this; - self.$parts[self.$level].part.push('.'); - return self; - }, - - space: function( positive ) { - var self = this; - self.$parts[self.$level].part.push((false===positive) ? '\\S' : '\\s'); - return self; - }, - - digit: function( positive ) { - var self = this; - self.$parts[self.$level].part.push((false===positive) ? '\\D' : '\\d'); - return self; - }, - - word: function( positive ) { - var self = this; - self.$parts[self.$level].part.push((false===positive) ? '\\W' : '\\w'); - return self; - }, - - boundary: function( positive ) { - var self = this; - self.$parts[self.$level].part.push((false===positive) ? '\\B' : '\\b'); - return self; - }, - - LF: function( ) { - var self = this; - self.$parts[self.$level].part.push('\\n'); - return self; - }, - - CR: function( ) { - var self = this; - self.$parts[self.$level].part.push('\\r'); - return self; - }, - - TAB: function( ) { - var self = this; - self.$parts[self.$level].part.push('\\t'); - return self; - }, - - CTRL: function( _char ) { - var self = this; - if ( _char ) self.$parts[self.$level].part.push('\\c'+_char); - return self; - }, - - backSpace: function( ) { - var self = this; - self.$parts[self.$level].part.push('[\\b]'); - return self; - }, - - backReference: function( n ) { - var self = this; - self.$parts[self.$level].part.push('\\'+parseInt(n, 10)); - return self; - }, - - characters: function( ) { - var self = this; - if ( T_CHARGROUP == self.$parts[self.$level].type ) - { - var chars = getArgs(arguments, 1).map(esc).join(''); - self.$parts[self.$level].part.push( chars ); - } - return self; - }, - - range: function( start, end ) { - var self = this; - if ( T_CHARGROUP == self.$parts[self.$level].type ) - { - if ( undef === start || undef === end ) return self; - var range = esc(start) + '-' + esc(end); - self.$parts[self.$level].part.push( range ); - } - return self; - }, - - alternate: function( ) { - var self = this; - self.$level++; - self.$parts.push({part: [], type: T_EITHER, flag: ''}); - return self; - }, - - group: function( ) { - var self = this; - self.$level++; - self.$parts.push({part: [], type: T_GROUP, flag: ''}); - return self; - }, - - nonCaptureGroup: function( ) { - var self = this; - self.$level++; - self.$parts.push({part: [], type: T_GROUP, flag: '?:'}); - return self; - }, - - lookAheadGroup: function( positive ) { - var self = this; - self.$level++; - self.$parts.push({part: [], type: T_GROUP, flag: (false===positive) ? '?!' : '?='}); - return self; - }, - - characterGroup: function( positive ) { - var self = this; - self.$level++; - self.$parts.push({part: [], type: T_CHARGROUP, flag: (false===positive) ? '^' : ''}); - return self; - }, - - end: function( ) { - var self = this, prev = self.$parts.pop() || {}, - type = prev.type, - flag = prev.flag || '', - part = prev.part || [], - level - ; - - if (0 < self.$level) - { - level = --self.$level; - - if ( (T_EITHER|T_GROUP) & type ) - self.$parts[level].part.push('(' + flag + part.join('|') + ')'); - - else if ( T_CHARGROUP & type ) - self.$parts[level].part.push('[' + flag + part.join('') + ']'); - } - return self; - } -}; -// aliases -var CP = Composer.prototype; -CP.startOfLine = CP.startOfInput; -CP.endOfLine = CP.endOfInput; -CP.subRegex = CP.sub; -CP.lineFeed = CP.LF; -CP.carriageReturn = CP.CR; -CP.tabulate = CP.tab = CP.TAB; -CP.control = CP.CTRL; -CP.wordBoundary = CP.boundary; -CP.either = CP.alternate; -CP.subGroup = CP.group; -CP.nonCaptureSubGroup = CP.nonCaptureGroup; -CP.lookAheadSubGroup = CP.lookAheadGroup; - -exports['RegExComposer'] = Composer; - - /* main code ends here */ - /* export the module */ - return exports["RegExComposer"]; -}); \ No newline at end of file diff --git a/build/js/regexcomposer.min.js b/build/js/regexcomposer.min.js deleted file mode 100644 index e0ee895..0000000 --- a/build/js/regexcomposer.min.js +++ /dev/null @@ -1,9 +0,0 @@ -/** -* -* RegExComposer -* @version: 0.4 -* -* A simple and intuitive Regular Expression Composer in JavaScript -* https://github.com/foo123/regex-analyzer -* -**/!function(r,t,e){"use strict";var n,a="object"==typeof module&&module.exports,p="function"==typeof define&&define.amd;a?module.exports=(module.$deps=module.$deps||{})[t]=module.$deps[t]||e.call(r,{NODE:module})||1:p&&"function"==typeof require&&"function"==typeof require.specified&&require.specified(t)?define(t,["require","exports","module"],function(t,n,a){return e.call(r,{AMD:a})}):t in r||(r[t]=n=e.call(r,{})||1)&&p&&define(t,[],function(){return n})}(this,"RegExComposer",function(r){var t=void 0,e=Object.prototype,n=Array.prototype,a=(e.toString,function(r){return n.slice.apply(r,n.slice.call(arguments,1))}),p=function(r){return r.replace(/([.*+?^${}()|[\]\/\\\-])/g,"\\$1")},u=function(r){for(var t=[],e=0;e chars.indexOf(c); }); + return choices.length ? choices[rnd(0, choices.length-1)] : ''; + }, - return chars; - }, - - SPACES = " \r\n\t\v".split(""), - PUNCTS = "~!@#$%^&*()-+=[]{}\\|;:,./<>?".split(""), - DIGITS = "0123456789".split(""), - ALPHAS = ["_"].concat(getCharRange("a", "z")).concat(getCharRange("A", "Z")), - ALL = SPACES.concat(PUNCTS).concat(DIGITS).concat(ALPHAS), - punct = function( ){ return PUNCTS[rnd(0, PUNCTS.length-1)]; }, - space = function( positive ){ - return false !== positive - ? SPACES[rnd(0, SPACES.length-1)] - : [punct(), digit(), alpha()][rnd(0,2)] - ; - }, - digit = function( positive ){ - return false !== positive - ? DIGITS[rnd(0, DIGITS.length-1)] - : [punct(), space(), alpha()][rnd(0,2)] - ; - }, - alpha = function( positive ){ - return false !== positive - ? ALPHAS[rnd(0, ALPHAS.length-1)] - : [punct(), space(), digit()][rnd(0,2)] - ; - }, - word = function( positive ){ - return false !== positive - ? ["_"].concat(ALPHAS).concat(DIGITS)[rnd(0, ALPHAS.length+DIGITS.length)] - : [punct(), space()][rnd(0,1)] - ; - }, - character = function( chars, positive ){ - if ( false !== positive ) return chars.length ? chars[rnd(0, chars.length-1)] : ''; - var choices = ALL.filter(function(c){ return 0 > chars.indexOf(c); }); - return choices.length ? choices[rnd(0, choices.length-1)] : ''; - }, - - concat = function(p1, p2) { - if ( p2 && ( p2 instanceof Arr || "[object Array]" == to_string.call(p2) ) ) - { - for (var p=0, l=p2.length; p= l || !p || "Quantifier" != p.type || - ( !p.flags.MatchZeroOrMore && !p.flags.MatchZeroOrOne && "0"!=p.flags.MatchMinimum ) - ); - while ( !done ) + getPeekChars = function(part) { + var peek = {}, negativepeek = {}, current, p, i, l, + tmp, done, type, ptype; + + type = part.type; + // walk the sequence + if ( "Alternation" == type ) + { + for (i=0, l=part.part.length; i= l || !p || "Quantifier" != p.type || ( !p.flags.MatchZeroOrMore && !p.flags.MatchZeroOrOne && "0"!=p.flags.MatchMinimum ) ); - } - if ( i < l ) - { - p = part.part[i]; - - if ("Special" == p.type && ('^'==p.part || '$'==p.part)) p = part.part[i+1] || null; - - if (p && "Quantifier" == p.type) p = p.part; - - if (p) + while ( !done ) { - tmp = getPeekChars( p ); + tmp = getPeekChars( p.part ); peek = concat( peek, tmp.peek ); negativepeek = concat( negativepeek, tmp.negativepeek ); + + i++; + p = part.part[i]; + + done = ( + i >= l || !p || "Quantifier" != p.type || + ( !p.flags.MatchZeroOrMore && !p.flags.MatchZeroOrOne && "0"!=p.flags.MatchMinimum ) + ); + } + if ( i < l ) + { + p = part.part[i]; + + if ("Special" == p.type && ('^'==p.part || '$'==p.part)) p = part.part[i+1] || null; + + if (p && "Quantifier" == p.type) p = p.part; + + if (p) + { + tmp = getPeekChars( p ); + peek = concat( peek, tmp.peek ); + negativepeek = concat( negativepeek, tmp.negativepeek ); + } } } - } - - else if ( "CharGroup" == type ) - { - current = ( part.flags.NotMatch ) ? negativepeek : peek; - for (i=0, l=part.part.length; i 0 ) + { + repeat--; + sample += generate( p.part, isCaseInsensitive ); + } } - while ( repeat > 0 ) + else if ( "Special" == p.type ) { - repeat--; - sample += generate( p.part, isCaseInsensitive ); + if ( p.flags.MatchAnyChar ) sample += any( ); + } + else + { + sample += generate( p, isCaseInsensitive ); } - } - else - { - sample += generate( p, isCaseInsensitive ); } } - } - - else if ( "CharGroup" == type ) - { - var chars = [], ptype; - for (i=0, l=part.part.length; i 1 ) + { + sequence.push( { part: prev.part.slice(0, -1), flags: {}, type: "String" } ); + prev.part = prev.part.slice(-1); + } + sequence.push( { part: prev, flags: flag, type: "Quantifier" } ); } - var prev = sequence.pop(); - if ( "String" == prev.type && prev.part.length > 1 ) + + // quantifiers + else if ( '*' == ch || '+' == ch || '?' == ch ) { - sequence.push( { part: prev.part.slice(0, -1), flags: {}, type: "String" } ); - prev.part = prev.part.slice(-1); + if ( word.length ) + { + sequence.push( { part: word, flags: {}, type: "String" } ); + word = ''; + } + flag = {}; + flag[ specialChars[ch] ] = 1; + if ( '?' == self.regex.charAt(self.pos) ) + { + flag[ "isGreedy" ] = 0; + self.pos++; + } + else + { + flag[ "isGreedy" ] = 1; + } + var prev = sequence.pop(); + if ( "String" == prev.type && prev.part.length > 1 ) + { + sequence.push( { part: prev.part.slice(0, -1), flags: {}, type: "String" } ); + prev.part = prev.part.slice(-1); + } + sequence.push( { part: prev, flags: flag, type: "Quantifier" } ); } - sequence.push( { part: prev, flags: flag, type: "Quantifier" } ); - } - // quantifiers - else if ( '*' == ch || '+' == ch || '?' == ch ) - { - if ( word.length ) + // special characters like ^, $, ., etc.. + else if ( specialChars[ch] ) { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; - } - flag = {}; - flag[ specialChars[ch] ] = 1; - if ( '?' == self.regex.charAt(self.pos) ) - { - flag[ "isGreedy" ] = 0; - self.pos++; + if ( word.length ) + { + sequence.push( { part: word, flags: {}, type: "String" } ); + word = ''; + } + flag = {}; + flag[ specialChars[ch] ] = 1; + sequence.push( { part: ch, flags: flag, type: "Special" } ); } + else { - flag[ "isGreedy" ] = 1; - } - var prev = sequence.pop(); - if ( "String" == prev.type && prev.part.length > 1 ) - { - sequence.push( { part: prev.part.slice(0, -1), flags: {}, type: "String" } ); - prev.part = prev.part.slice(-1); - } - sequence.push( { part: prev, flags: flag, type: "Quantifier" } ); - } - - // special characters like ^, $, ., etc.. - else if ( specialChars[ch] ) - { - if ( word.length ) - { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; + word += ch; } - flag = {}; - flag[ specialChars[ch] ] = 1; - sequence.push( { part: ch, flags: flag, type: "Special" } ); - } - - else - { - word += ch; } } - } - if ( word.length ) - { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; - } - if ( alternation.length ) - { - alternation.push( { part: sequence, flags: {}, type: "Sequence" } ); - sequence = []; - flag = {}; - flag[ specialChars['|'] ] = 1; - return { part: { part: alternation, flags: flag, type: "Alternation" }, flags: flags, type: "Group" }; - } - else - { - return { part: { part: sequence, flags: {}, type: "Sequence" }, flags: flags, type: "Group" }; - } - }, - - chargroup = function( self ) { - var sequence = [], chars = [], flags = {}, flag, ch, prevch, range, isRange = false, match, isUnicode, escaped = false; - - if ( '^' == self.regex.charAt( self.pos ) ) - { - flags[ "NotMatch" ] = 1; - self.pos++; - } - - while ( self.pos < self.regex.length ) - { - isUnicode = false; - prevch = ch; - ch = self.regex.charAt( self.pos++ ); - - escaped = (escapeChar == ch) ? true : false; - if ( escaped ) ch = self.regex.charAt( self.pos++ ); - - if ( escaped ) + if ( word.length ) { - // unicode character - if ( 'u' == ch ) - { - match = unicodeRegex.exec( self.regex.substr( self.pos-1 ) ); - self.pos += match[0].length-1; - ch = String.fromCharCode(parseInt(match[1], 16)); - isUnicode = true; - } - - // hex character - else if ( 'x' == ch ) - { - match = hexRegex.exec( self.regex.substr( self.pos-1 ) ); - self.pos += match[0].length-1; - ch = String.fromCharCode(parseInt(match[1], 16)); - isUnicode = true; - } + sequence.push( { part: word, flags: {}, type: "String" } ); + word = ''; } - - if ( isRange ) + if ( alternation.length ) { - if ( chars.length ) - { - sequence.push( { part: chars, flags: {}, type: "Chars" } ); - chars = []; - } - range[1] = ch; - isRange = false; - sequence.push( { part: range, flags: {}, type: "CharRange" } ); + alternation.push( { part: sequence, flags: {}, type: "Sequence" } ); + sequence = []; + flag = {}; + flag[ specialChars['|'] ] = 1; + return { part: { part: alternation, flags: flag, type: "Alternation" }, flags: flags, type: "Group" }; } else { + return { part: { part: sequence, flags: {}, type: "Sequence" }, flags: flags, type: "Group" }; + } + }, + + chargroup = function( self ) { + var sequence = [], chars = [], flags = {}, flag, ch, prevch, range, isRange = false, match, isUnicode, escaped = false; + + if ( '^' == self.regex.charAt( self.pos ) ) + { + flags[ "NotMatch" ] = 1; + self.pos++; + } + + while ( self.pos < self.regex.length ) + { + isUnicode = false; + prevch = ch; + ch = self.regex.charAt( self.pos++ ); + + escaped = (escapeChar == ch) ? true : false; + if ( escaped ) ch = self.regex.charAt( self.pos++ ); + if ( escaped ) { - if ( !isUnicode && specialCharsEscaped[ch] && '/' != ch) + // unicode character + if ( 'u' == ch ) { - if ( chars.length ) - { - sequence.push( { part: chars, flags: {}, type: "Chars" } ); - chars = []; - } - flag = {}; - flag[ specialCharsEscaped[ch] ] = 1; - sequence.push( { part: ch, flags: flag, type: "Special" } ); + match = unicodeRegex.exec( self.regex.substr( self.pos-1 ) ); + self.pos += match[0].length-1; + ch = String.fromCharCode(parseInt(match[1], 16)); + isUnicode = true; } - else + // hex character + else if ( 'x' == ch ) { - chars.push( ch ); + match = hexRegex.exec( self.regex.substr( self.pos-1 ) ); + self.pos += match[0].length-1; + ch = String.fromCharCode(parseInt(match[1], 16)); + isUnicode = true; } } + if ( isRange ) + { + if ( chars.length ) + { + sequence.push( { part: chars, flags: {}, type: "Chars" } ); + chars = []; + } + range[1] = ch; + isRange = false; + sequence.push( { part: range, flags: {}, type: "CharRange" } ); + } else { - // end of char group - if ( ']' == ch ) + if ( escaped ) { - if ( chars.length ) + if ( !isUnicode && specialCharsEscaped[ch] && '/' != ch) { - sequence.push( { part: chars, flags: {}, type: "Chars" } ); - chars = []; + if ( chars.length ) + { + sequence.push( { part: chars, flags: {}, type: "Chars" } ); + chars = []; + } + flag = {}; + flag[ specialCharsEscaped[ch] ] = 1; + sequence.push( { part: ch, flags: flag, type: "Special" } ); + } + + else + { + chars.push( ch ); } - return { part: sequence, flags: flags, type: "CharGroup" }; - } - - else if ( '-' == ch ) - { - range = [prevch, '']; - chars.pop(); - isRange = true; } else { - chars.push( ch ); + // end of char group + if ( ']' == ch ) + { + if ( chars.length ) + { + sequence.push( { part: chars, flags: {}, type: "Chars" } ); + chars = []; + } + return { part: sequence, flags: flags, type: "CharGroup" }; + } + + else if ( '-' == ch ) + { + range = [prevch, '']; + chars.pop(); + isRange = true; + } + + else + { + chars.push( ch ); + } } } } - } - if ( chars.length ) - { - sequence.push( { part: chars, flags: {}, type: "Chars" } ); - chars = []; - } - return { part: sequence, flags: flags, type: "CharGroup" }; - }, - - analyze = function( regex ) { - var self = {pos: 0, groupIndex: 0, regex: regex}; - var ch, word = '', alternation = [], sequence = [], flag, match, escaped = false; + if ( chars.length ) + { + sequence.push( { part: chars, flags: {}, type: "Chars" } ); + chars = []; + } + return { part: sequence, flags: flags, type: "CharGroup" }; + }, - while ( self.pos < self.regex.length ) - { - ch = self.regex.charAt( self.pos++ ); - - // \\abc - escaped = (escapeChar == ch) ? true : false; - if ( escaped ) ch = self.regex.charAt( self.pos++ ); + analyze = function( regex ) { + var self = {pos: 0, groupIndex: 0, regex: regex}; + var ch, word = '', alternation = [], sequence = [], flag, match, escaped = false; - if ( escaped ) + while ( self.pos < self.regex.length ) { - // unicode character - if ( 'u' == ch ) - { - if ( word.length ) - { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; - } - match = unicodeRegex.exec( self.regex.substr( self.pos-1 ) ); - self.pos += match[0].length-1; - sequence.push( { part: match[0], flags: { "Char": String.fromCharCode(parseInt(match[1], 16)), "Code": match[1] }, type: "UnicodeChar" } ); - } + ch = self.regex.charAt( self.pos++ ); - // hex character - else if ( 'x' == ch ) - { - if ( word.length ) - { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; - } - match = hexRegex.exec( self.regex.substr( self.pos-1 ) ); - self.pos += match[0].length-1; - sequence.push( { part: match[0], flags: { "Char": String.fromCharCode(parseInt(match[1], 16)), "Code": match[1] }, type: "HexChar" } ); - } + // \\abc + escaped = (escapeChar == ch) ? true : false; + if ( escaped ) ch = self.regex.charAt( self.pos++ ); - else if ( specialCharsEscaped[ch] && '/' != ch) + if ( escaped ) { - if ( word.length ) + // unicode character + if ( 'u' == ch ) { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; + if ( word.length ) + { + sequence.push( { part: word, flags: {}, type: "String" } ); + word = ''; + } + match = unicodeRegex.exec( self.regex.substr( self.pos-1 ) ); + self.pos += match[0].length-1; + sequence.push( { part: match[0], flags: { "Char": String.fromCharCode(parseInt(match[1], 16)), "Code": match[1] }, type: "UnicodeChar" } ); } - flag = {}; - flag[ specialCharsEscaped[ch] ] = 1; - sequence.push( { part: ch, flags: flag, type: "Special" } ); - } - - else - { - word += ch; - } - } - - else - { - // parse alternation - if ( '|' == ch ) - { - if ( word.length ) + + // hex character + else if ( 'x' == ch ) { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; + if ( word.length ) + { + sequence.push( { part: word, flags: {}, type: "String" } ); + word = ''; + } + match = hexRegex.exec( self.regex.substr( self.pos-1 ) ); + self.pos += match[0].length-1; + sequence.push( { part: match[0], flags: { "Char": String.fromCharCode(parseInt(match[1], 16)), "Code": match[1] }, type: "HexChar" } ); } - alternation.push( { part: sequence, flags: {}, type: "Sequence" } ); - sequence = []; - } - - // parse character group - else if ( '[' == ch ) - { - if ( word.length ) + + else if ( specialCharsEscaped[ch] && '/' != ch) { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; + if ( word.length ) + { + sequence.push( { part: word, flags: {}, type: "String" } ); + word = ''; + } + flag = {}; + flag[ specialCharsEscaped[ch] ] = 1; + sequence.push( { part: ch, flags: flag, type: "Special" } ); } - sequence.push( chargroup( self ) ); - } - - // parse sub-group - else if ( '(' == ch ) - { - if ( word.length ) + + else { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; + word += ch; } - sequence.push( subgroup( self ) ); } - // parse num repeats - else if ( '{' == ch ) + else { - if ( word.length ) + // parse alternation + if ( '|' == ch ) { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; + if ( word.length ) + { + sequence.push( { part: word, flags: {}, type: "String" } ); + word = ''; + } + alternation.push( { part: sequence, flags: {}, type: "Sequence" } ); + sequence = []; } - match = repeatsRegex.exec( self.regex.substr( self.pos-1 ) ); - self.pos += match[0].length-1; - flag = { part: match[0], "MatchMinimum": match[1], "MatchMaximum": match[2] || "unlimited" }; - flag[ specialChars[ch] ] = 1; - if ( '?' == self.regex.charAt(self.pos) ) + + // parse character group + else if ( '[' == ch ) { - flag[ "isGreedy" ] = 0; - self.pos++; + if ( word.length ) + { + sequence.push( { part: word, flags: {}, type: "String" } ); + word = ''; + } + sequence.push( chargroup( self ) ); } - else + + // parse sub-group + else if ( '(' == ch ) { - flag[ "isGreedy" ] = 1; + if ( word.length ) + { + sequence.push( { part: word, flags: {}, type: "String" } ); + word = ''; + } + sequence.push( subgroup( self ) ); } - var prev = sequence.pop(); - if ( "String" == prev.type && prev.part.length > 1 ) + + // parse num repeats + else if ( '{' == ch ) { - sequence.push( { part: prev.part.slice(0, -1), flags: {}, type: "String" } ); - prev.part = prev.part.slice(-1); + if ( word.length ) + { + sequence.push( { part: word, flags: {}, type: "String" } ); + word = ''; + } + match = repeatsRegex.exec( self.regex.substr( self.pos-1 ) ); + self.pos += match[0].length-1; + flag = { part: match[0], "MatchMinimum": match[1], "MatchMaximum": match[2] || "unlimited" }; + flag[ specialChars[ch] ] = 1; + if ( '?' == self.regex.charAt(self.pos) ) + { + flag[ "isGreedy" ] = 0; + self.pos++; + } + else + { + flag[ "isGreedy" ] = 1; + } + var prev = sequence.pop(); + if ( "String" == prev.type && prev.part.length > 1 ) + { + sequence.push( { part: prev.part.slice(0, -1), flags: {}, type: "String" } ); + prev.part = prev.part.slice(-1); + } + sequence.push( { part: prev, flags: flag, type: "Quantifier" } ); } - sequence.push( { part: prev, flags: flag, type: "Quantifier" } ); - } - - // quantifiers - else if ( '*' == ch || '+' == ch || '?' == ch ) - { - if ( word.length ) + + // quantifiers + else if ( '*' == ch || '+' == ch || '?' == ch ) { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; + if ( word.length ) + { + sequence.push( { part: word, flags: {}, type: "String" } ); + word = ''; + } + flag = {}; + flag[ specialChars[ch] ] = 1; + if ( '?' == self.regex.charAt(self.pos) ) + { + flag[ "isGreedy" ] = 0; + self.pos++; + } + else + { + flag[ "isGreedy" ] = 1; + } + var prev = sequence.pop(); + if ( "String" == prev.type && prev.part.length > 1 ) + { + sequence.push( { part: prev.part.slice(0, -1), flags: {}, type: "String" } ); + prev.part = prev.part.slice(-1); + } + sequence.push( { part: prev, flags: flag, type: "Quantifier" } ); } - flag = {}; - flag[ specialChars[ch] ] = 1; - if ( '?' == self.regex.charAt(self.pos) ) + + // special characters like ^, $, ., etc.. + else if ( specialChars[ch] ) { - flag[ "isGreedy" ] = 0; - self.pos++; + if ( word.length ) + { + sequence.push( { part: word, flags: {}, type: "String" } ); + word = ''; + } + flag = {}; + flag[ specialChars[ch] ] = 1; + sequence.push( { part: ch, flags: flag, type: "Special" } ); } + else { - flag[ "isGreedy" ] = 1; - } - var prev = sequence.pop(); - if ( "String" == prev.type && prev.part.length > 1 ) - { - sequence.push( { part: prev.part.slice(0, -1), flags: {}, type: "String" } ); - prev.part = prev.part.slice(-1); - } - sequence.push( { part: prev, flags: flag, type: "Quantifier" } ); - } - - // special characters like ^, $, ., etc.. - else if ( specialChars[ch] ) - { - if ( word.length ) - { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; + word += ch; } - flag = {}; - flag[ specialChars[ch] ] = 1; - sequence.push( { part: ch, flags: flag, type: "Special" } ); - } - - else - { - word += ch; } } - } - - if ( word.length ) - { - sequence.push( { part: word, flags: {}, type: "String" } ); - word = ''; - } - - if ( alternation.length ) - { - alternation.push( { part: sequence, flags: {}, type: "Sequence" } ); - sequence = []; - flag = {}; - flag[ specialChars['|'] ] = 1; - parts = { part: alternation, flags: flag, type: "Alternation" }; - } - else - { - parts = { part: sequence, flags: {}, type: "Sequence" }; - } - return parts; - } -; - -// A simple (js-flavored) regular expression analyzer -var Analyzer = function( regex, delim ) { - if ( regex ) this.regex( regex, delim ); -}; -Analyzer.VERSION = "@@VERSION@@"; -Analyzer.getCharRange = getCharRange; -Analyzer.prototype = { - - constructor: Analyzer, - - $regex: null, - $flags: null, - $parts: null, - $needsRefresh: true, - - dispose: function( ) { - var self = this; - self.$regex = null; - self.$flags = null; - self.$parts = null; - return self; - }, - - regex: function( regex, delim ) { - var self = this; - if ( regex ) - { - delim = delim || '/'; - var flags = {}, r = regex.toString( ), l = r.length, ch = r.charAt(l-1); - // parse regex flags - while ( delim !== ch ) + if ( word.length ) { - flags[ ch ] = 1; - r = r.substr(0, l-1); - l = r.length; - ch = r.charAt(l-1); + sequence.push( { part: word, flags: {}, type: "String" } ); + word = ''; } - // remove regex delimiters - if ( delim == r.charAt(0) && delim == r.charAt(l-1) ) r = r.substr(1, l-2); - if ( self.$regex !== r ) self.$needsRefresh = true; - self.$regex = r; self.$flags = flags; - } - return self; - }, - - analyze: function( ) { - var self = this; - if ( self.$needsRefresh ) - { - self.$parts = analyze( self.$regex ); - self.$needsRefresh = false; + if ( alternation.length ) + { + alternation.push( { part: sequence, flags: {}, type: "Sequence" } ); + sequence = []; + flag = {}; + flag[ specialChars['|'] ] = 1; + return { part: alternation, flags: flag, type: "Alternation" }; + } + else + { + return { part: sequence, flags: {}, type: "Sequence" }; + } } - return self; - }, - - getParts: function( ) { - var self = this; - if ( self.$needsRefresh ) self.analyze( ); - return self.$parts; - }, - - // experimental feature - generateSample: function( ) { - var self = this; - if ( self.$needsRefresh ) self.analyze( ); - return generate( self.$parts, self.$flags && self.$flags.i ); - }, - - // experimental feature - getPeekChars: function( ) { - var self = this, isCaseInsensitive, - peek, n, c, p, cases; - - if ( self.$needsRefresh ) self.analyze( ); + ; + + // A simple (js-flavored) regular expression analyzer + var Analyzer = function( regex, delim ) { + if ( regex ) this.regex( regex, delim ); + }; + Analyzer.VERSION = __version__; + Analyzer.getCharRange = getCharRange; + Analyzer.prototype = { - peek = getPeekChars( self.$parts ); - isCaseInsensitive = self.$flags && self.$flags.i; + constructor: Analyzer, + + $regex: null, + $flags: null, + $parts: null, + $needsRefresh: true, + + dispose: function( ) { + var self = this; + self.$regex = null; + self.$flags = null; + self.$parts = null; + return self; + }, - for (n in peek) - { - cases = {}; - // either peek or negativepeek - p = peek[n]; - for (c in p) + regex: function( regex, delim ) { + var self = this; + if ( regex ) { - if ('\\d' == c) - { - delete p[c]; - cases = concat(cases, getCharRange('0', '9')); - } - - else if ('\\s' == c) - { - delete p[c]; - cases = concat(cases, ['\f','\n','\r','\t','\v','\u00A0','\u2028','\u2029']); - } + delim = delim || '/'; + var flags = {}, r = regex.toString( ), l = r.length, ch = r.charAt(l-1); - else if ('\\w' == c) + // parse regex flags + while ( delim !== ch ) { - delete p[c]; - cases = concat(cases, ['_'].concat(getCharRange('0', '9')).concat(getCharRange('a', 'z')).concat(getCharRange('A', 'Z'))); + flags[ ch ] = 1; + r = r.substr(0, l-1); + l = r.length; + ch = r.charAt(l-1); } + // remove regex delimiters + if ( delim == r.charAt(0) && delim == r.charAt(l-1) ) r = r.substr(1, l-2); - else if ('\\.' == c) - { - delete p[c]; - cases[ specialChars['.'] ] = 1; - } - - /*else if ('\\^' == c) - { - delete p[c]; - cases[ specialChars['^'] ] = 1; - } - - else if ('\\$' == c) - { - delete p[c]; - cases[ specialChars['$'] ] = 1; - }*/ - - else if ( '\\' != c.charAt(0) && isCaseInsensitive ) - { - cases[ c.toLowerCase() ] = 1; - cases[ c.toUpperCase() ] = 1; - } - - else if ( '\\' == c.charAt(0) ) + if ( self.$regex !== r ) self.$needsRefresh = true; + self.$regex = r; self.$flags = flags; + } + return self; + }, + + analyze: function( ) { + var self = this; + if ( self.$needsRefresh ) + { + self.$parts = analyze( self.$regex ); + self.$needsRefresh = false; + } + return self; + }, + + getParts: function( ) { + var self = this; + if ( self.$needsRefresh ) self.analyze( ); + return self.$parts; + }, + + // experimental feature + generateSample: function( ) { + var self = this; + if ( self.$needsRefresh ) self.analyze( ); + return generate( self.$parts, self.$flags && self.$flags.i ); + }, + + // experimental feature + getPeekChars: function( ) { + var self = this, isCaseInsensitive, + peek, n, c, p, cases; + + if ( self.$needsRefresh ) self.analyze( ); + + peek = getPeekChars( self.$parts ); + isCaseInsensitive = self.$flags && self.$flags.i; + + for (n in peek) + { + cases = {}; + // either peek or negativepeek + p = peek[n]; + for (c in p) { - delete p[c]; + if ('\\d' == c) + { + delete p[c]; + cases = concat(cases, getCharRange('0', '9')); + } + + else if ('\\s' == c) + { + delete p[c]; + cases = concat(cases, ['\f','\n','\r','\t','\v','\u00A0','\u2028','\u2029']); + } + + else if ('\\w' == c) + { + delete p[c]; + cases = concat(cases, ['_'].concat(getCharRange('0', '9')).concat(getCharRange('a', 'z')).concat(getCharRange('A', 'Z'))); + } + + else if ('\\.' == c) + { + delete p[c]; + cases[ specialChars['.'] ] = 1; + } + + /*else if ('\\^' == c) + { + delete p[c]; + cases[ specialChars['^'] ] = 1; + } + + else if ('\\$' == c) + { + delete p[c]; + cases[ specialChars['$'] ] = 1; + }*/ + + else if ( '\\' != c.charAt(0) && isCaseInsensitive ) + { + cases[ c.toLowerCase() ] = 1; + cases[ c.toUpperCase() ] = 1; + } + + else if ( '\\' == c.charAt(0) ) + { + delete p[c]; + } } + peek[n] = concat(p, cases); } - peek[n] = concat(p, cases); + return peek; } - return peek; - } -}; - + }; -exports['@@MODULE_NAME@@'] = Analyzer; + /* main code ends here */ + /* export the module */ + return Analyzer; +}); \ No newline at end of file diff --git a/src/js/regexanalyzer.min.js b/src/js/regexanalyzer.min.js new file mode 100644 index 0000000..bf30b2b --- /dev/null +++ b/src/js/regexanalyzer.min.js @@ -0,0 +1,10 @@ +/** +* +* RegExAnalyzer +* @version: 0.4.1 +* +* A simple Regular Expression Analyzer for PHP, Python, Node/JS +* https://github.com/foo123/regex-analyzer +* +**/ +!function(e,t,r){"use strict";var a,p="object"==typeof module&&module.exports,s="function"==typeof define&&define.amd;p?module.exports=(module.$deps=module.$deps||{})[t]=module.$deps[t]||r.call(e,{NODE:module})||1:s&&"function"==typeof require&&"function"==typeof require.specified&&require.specified(t)?define(t,["require","exports","module"],function(t,a,p){return r.call(e,{AMD:p})}):t in e||(e[t]=a=r.call(e,{})||1)&&s&&define(t,[],function(){return a})}(this,"RegExAnalyzer",function(){"use strict";var e="0.4.1",t=Object,r=Array,a=t.prototype.toString,p="\\",s=/^\{\s*(\d+)\s*,?\s*(\d+)?\s*\}/,n=/^u([0-9a-fA-F]{4})/,h=/^x([0-9a-fA-F]{2})/,l={".":"MatchAnyChar","|":"MatchEither","?":"MatchZeroOrOne","*":"MatchZeroOrMore","+":"MatchOneOrMore","^":"MatchStart",$:"MatchEnd","{":"StartRepeats","}":"EndRepeats","(":"StartGroup",")":"EndGroup","[":"StartCharGroup","]":"EndCharGroup"},g={"\\":"EscapeChar","/":"/",0:"NULChar",f:"FormFeed",n:"LineFeed",r:"CarriageReturn",t:"HorizontalTab",v:"VerticalTab",b:"MatchWordBoundary",B:"MatchNonWordBoundary",s:"MatchSpaceChar",S:"MatchNonSpaceChar",w:"MatchWordChar",W:"MatchNonWordChar",d:"MatchDigitChar",D:"MatchNonDigitChar"},o=function(e,t){return Math.round((t-e)*Math.random()+e)},i=function(e,t){e&&(e instanceof r||"[object Array]"==a.call(e))&&(t=e[1],e=e[0]);var p,s,n=e.charCodeAt(0),h=t.charCodeAt(0);if(h==n)return[String.fromCharCode(n)];for(s=[],p=n;h>=p;++p)s.push(String.fromCharCode(p));return s},f=("\r\n".split("")," ".split("")),u="~!@#$%^&*()-+=[]{}\\|;:,./<>?".split(""),c="0123456789".split(""),y=["_"].concat(i("a","z")).concat(i("A","Z")),d=f.concat(u).concat(c).concat(y),C=function(){return u[o(0,u.length-1)]},S=function(e){return!1!==e?f[o(0,f.length-1)]:[C(),x(),M()][o(0,2)]},x=function(e){return!1!==e?c[o(0,c.length-1)]:[C(),S(),M()][o(0,2)]},M=function(e){return!1!==e?y[o(0,y.length-1)]:[C(),S(),x()][o(0,2)]},m=function(e){return!1!==e?["_"].concat(y).concat(c)[o(0,y.length+c.length)]:[C(),S()][o(0,1)]},A=function(){return d[o(0,d.length-1)]},v=function(e,t){if(!1!==t)return e.length?e[o(0,e.length-1)]:"";var r=d.filter(function(t){return 0>e.indexOf(t)});return r.length?r[o(0,r.length-1)]:""},$=function(e,t){if(t&&(t instanceof r||"[object Array]"==a.call(t)))for(var p=0,s=t.length;s>p;p++)e[t[p]]=1;else for(var p in t)e[p]=1;return e},G=function(e,t){return t?(e.charAt&&(e=e.split("")),e=e.map(function(e){return o(0,1)?e.toLowerCase():e.toUpperCase()}),t||(e=e.join("")),e):o(0,1)?e.toLowerCase():e.toUpperCase()},O=function(e){var t,r,a,p,s,n,h,l,g={},o={};if(h=e.type,"Alternation"==h)for(a=0,p=e.part.length;p>a;a++)s=O(e.part[a]),g=$(g,s.peek),o=$(o,s.negativepeek);else if("Group"==h)s=O(e.part),g=$(g,s.peek),o=$(o,s.negativepeek);else if("Sequence"==h){for(a=0,p=e.part.length,r=e.part[a],n=a>=p||!r||"Quantifier"!=r.type||!r.flags.MatchZeroOrMore&&!r.flags.MatchZeroOrOne&&"0"!=r.flags.MatchMinimum;!n;)s=O(r.part),g=$(g,s.peek),o=$(o,s.negativepeek),a++,r=e.part[a],n=a>=p||!r||"Quantifier"!=r.type||!r.flags.MatchZeroOrMore&&!r.flags.MatchZeroOrOne&&"0"!=r.flags.MatchMinimum;p>a&&(r=e.part[a],"Special"!=r.type||"^"!=r.part&&"$"!=r.part||(r=e.part[a+1]||null),r&&"Quantifier"==r.type&&(r=r.part),r&&(s=O(r),g=$(g,s.peek),o=$(o,s.negativepeek)))}else if("CharGroup"==h)for(t=e.flags.NotMatch?o:g,a=0,p=e.part.length;p>a;a++)r=e.part[a],l=r.type,"Chars"==l?t=$(t,r.part):"CharRange"==l?t=$(t,i(r.part)):"UnicodeChar"==l||"HexChar"==l?t[r.flags.Char]=1:"Special"==l&&("D"==r.part?e.flags.NotMatch?g["\\d"]=1:o["\\d"]=1:"W"==r.part?e.flags.NotMatch?g["\\w"]=1:o["\\W"]=1:"S"==r.part?e.flags.NotMatch?g["\\s"]=1:o["\\s"]=1:t["\\"+r.part]=1);else"String"==h?g[e.part.charAt(0)]=1:"Special"!=h||e.flags.MatchStart||e.flags.MatchEnd?("UnicodeChar"==h||"HexChar"==h)&&(g[e.flags.Char]=1):"D"==e.part?o["\\d"]=1:"W"==e.part?o["\\W"]=1:"S"==e.part?o["\\s"]=1:g["\\"+e.part]=1;return{peek:g,negativepeek:o}},b=function(e,t){var r,a,p,s,n="";if(s=e.type,"Alternation"==s)n+=b(e.part[o(0,e.part.length-1)],t);else if("Group"==s)n+=b(e.part,t);else if("Sequence"==s){var h,l,g;for(p=e.part.length,r=e.part[a],a=0;p>a;a++)if(r=e.part[a])if(h=1,"Quantifier"==r.type)for(r.flags.MatchZeroOrMore?h=o(0,10):r.flags.MatchZeroOrOne?h=o(0,1):r.flags.MatchOneOrMore?h=o(1,11):(l=parseInt(r.flags.MatchMinimum,10),g=parseInt(r.flags.MatchMaximum,10),h=o(l,isNaN(g)?l+10:g));h>0;)h--,n+=b(r.part,t);else"Special"==r.type?r.flags.MatchAnyChar&&(n+=A()):n+=b(r,t)}else if("CharGroup"==s){var f,u=[];for(a=0,p=e.part.length;p>a;a++)r=e.part[a],f=r.type,"Chars"==f?u=u.concat(t?G(r.part,!0):r.part):"CharRange"==f?u=u.concat(t?G(i(r.part),!0):i(r.part)):"UnicodeChar"==f||"HexChar"==f?u.push(t?G(r.flags.Char):r.flags.Char):"Special"==f&&u.push("D"==r.part?x(!1):"W"==r.part?m(!1):"S"==r.part?S(!1):"d"==r.part?x():"w"==r.part?m():"s"==r.part?S():"\\"+r.part);n+=v(u,!e.flags.NotMatch)}else"String"==s?n+=t?G(e.part):e.part:"Special"!=s||e.flags.MatchStart||e.flags.MatchEnd?("UnicodeChar"==s||"HexChar"==s)&&(n+=t?G(e.flags.Char):e.flags.Char):n+="D"==e.part?x(!1):"W"==e.part?m(!1):"S"==e.part?S(!1):"d"==e.part?x():"w"==e.part?m():"s"==e.part?S():"."==e.part?A():"\\"+e.part;return n},N=function(e){var t,r,a,o="",i=[],f=[],u={},c=!1,y=e.regex.substr(e.pos,2);for("?:"==y?(u.NotCaptured=1,e.pos+=2):"?="==y?(u.LookAhead=1,e.pos+=2):"?!"==y&&(u.NegativeLookAhead=1,e.pos+=2),u.GroupIndex=++e.groupIndex;e.pos1&&(f.push({part:d.part.slice(0,-1),flags:{},type:"String"}),d.part=d.part.slice(-1)),f.push({part:d,flags:r,type:"Quantifier"})}else if("*"==t||"+"==t||"?"==t){o.length&&(f.push({part:o,flags:{},type:"String"}),o=""),r={},r[l[t]]=1,"?"==e.regex.charAt(e.pos)?(r.isGreedy=0,e.pos++):r.isGreedy=1;var d=f.pop();"String"==d.type&&d.part.length>1&&(f.push({part:d.part.slice(0,-1),flags:{},type:"String"}),d.part=d.part.slice(-1)),f.push({part:d,flags:r,type:"Quantifier"})}else l[t]?(o.length&&(f.push({part:o,flags:{},type:"String"}),o=""),r={},r[l[t]]=1,f.push({part:t,flags:r,type:"Special"})):o+=t}return o.length&&(f.push({part:o,flags:{},type:"String"}),o=""),i.length?(i.push({part:f,flags:{},type:"Sequence"}),f=[],r={},r[l["|"]]=1,{part:{part:i,flags:r,type:"Alternation"},flags:u,type:"Group"}):{part:{part:f,flags:{},type:"Sequence"},flags:u,type:"Group"}},R=function(e){var t,r,a,s,l,o,i=[],f=[],u={},c=!1,y=!1;for("^"==e.regex.charAt(e.pos)&&(u.NotMatch=1,e.pos++);e.pos1&&(u.push({part:y.part.slice(0,-1),flags:{},type:"String"}),y.part=y.part.slice(-1)),u.push({part:y,flags:r,type:"Quantifier"})}else if("*"==t||"+"==t||"?"==t){i.length&&(u.push({part:i,flags:{},type:"String"}),i=""),r={},r[l[t]]=1,"?"==o.regex.charAt(o.pos)?(r.isGreedy=0,o.pos++):r.isGreedy=1;var y=u.pop();"String"==y.type&&y.part.length>1&&(u.push({part:y.part.slice(0,-1),flags:{},type:"String"}),y.part=y.part.slice(-1)),u.push({part:y,flags:r,type:"Quantifier"})}else l[t]?(i.length&&(u.push({part:i,flags:{},type:"String"}),i=""),r={},r[l[t]]=1,u.push({part:t,flags:r,type:"Special"})):i+=t;return i.length&&(u.push({part:i,flags:{},type:"String"}),i=""),f.length?(f.push({part:u,flags:{},type:"Sequence"}),u=[],r={},r[l["|"]]=1,{part:f,flags:r,type:"Alternation"}):{part:u,flags:{},type:"Sequence"}},k=function(e,t){e&&this.regex(e,t)};return k.VERSION=e,k.getCharRange=i,k.prototype={constructor:k,$regex:null,$flags:null,$parts:null,$needsRefresh:!0,dispose:function(){var e=this;return e.$regex=null,e.$flags=null,e.$parts=null,e},regex:function(e,t){var r=this;if(e){t=t||"/";for(var a={},p=e.toString(),s=p.length,n=p.charAt(s-1);t!==n;)a[n]=1,p=p.substr(0,s-1),s=p.length,n=p.charAt(s-1);t==p.charAt(0)&&t==p.charAt(s-1)&&(p=p.substr(1,s-2)),r.$regex!==p&&(r.$needsRefresh=!0),r.$regex=p,r.$flags=a}return r},analyze:function(){var e=this;return e.$needsRefresh&&(e.$parts=q(e.$regex),e.$needsRefresh=!1),e},getParts:function(){var e=this;return e.$needsRefresh&&e.analyze(),e.$parts},generateSample:function(){var e=this;return e.$needsRefresh&&e.analyze(),b(e.$parts,e.$flags&&e.$flags.i)},getPeekChars:function(){var e,t,r,a,p,s,n=this;n.$needsRefresh&&n.analyze(),t=O(n.$parts),e=n.$flags&&n.$flags.i;for(r in t){s={},p=t[r];for(a in p)"\\d"==a?(delete p[a],s=$(s,i("0","9"))):"\\s"==a?(delete p[a],s=$(s,["\f","\n","\r"," "," "," ","\u2028","\u2029"])):"\\w"==a?(delete p[a],s=$(s,["_"].concat(i("0","9")).concat(i("a","z")).concat(i("A","Z")))):"\\."==a?(delete p[a],s[l["."]]=1):"\\"!=a.charAt(0)&&e?(s[a.toLowerCase()]=1,s[a.toUpperCase()]=1):"\\"==a.charAt(0)&&delete p[a];t[r]=$(p,s)}return t}},k}); \ No newline at end of file diff --git a/src/js/regexcomposer.js b/src/js/regexcomposer.js index 4461551..18e1f66 100644 --- a/src/js/regexcomposer.js +++ b/src/js/regexcomposer.js @@ -1,292 +1,328 @@ /** * * RegExComposer -* @version: @@VERSION@@ +* @version: 0.4.1 * -* A simple and intuitive Regular Expression Composer in JavaScript +* A simple and intuitive Regular Expression Composer for PHP, Python, Node/JS * https://github.com/foo123/regex-analyzer * **/ -var undef = undefined, OP = Object.prototype, AP = Array.prototype, - to_string = OP.toString, +!function( root, name, factory ) { + "use strict"; - slice = function( a ) { return AP.slice.apply(a, AP.slice.call(arguments, 1)); }, - - esc = function( s ) { return s.replace(/([.*+?^${}()|[\]\/\\\-])/g, '\\$1'); }, + // + // export the module, umd-style (no other dependencies) + var isCommonJS = ("object" === typeof(module)) && module.exports, + isAMD = ("function" === typeof(define)) && define.amd, m; - flatten = function( a ) { - var r = [], i = 0; - while (i < a.length) r = r.concat(a[i++]); - return r; - }, + // CommonJS, node, etc.. + if ( isCommonJS ) + module.exports = (module.$deps = module.$deps || {})[ name ] = module.$deps[ name ] || (factory.call( root, {NODE:module} ) || 1); - getArgs = function( args, asArray ) { - /*var a = slice(args); - if ( asArray && a[0] && - ( a[0] instanceof Array || '[object Array]' == to_string.call(a[0]) ) - ) - a = a[0];*/ - return flatten( slice( args ) ); //a; - }, + // AMD, requireJS, etc.. + else if ( isAMD && ("function" === typeof(require)) && ("function" === typeof(require.specified)) && require.specified(name) ) + define( name, ['require', 'exports', 'module'], function( require, exports, module ){ return factory.call( root, {AMD:module} ); } ); - T_SEQ = 2, T_EITHER = 4, T_GROUP = 8, T_CHARGROUP = 16 -; + // browser, web worker, etc.. + AMD, other loaders + else if ( !(name in root) ) + (root[ name ] = (m=factory.call( root, {} ) || 1)) && isAMD && define( name, [], function( ){ return m; } ); -// A simple (js-flavored) regular expression composer -var Composer = function( ) { - this.$regex = null; - this.reset( ); -}; -Composer.VERSION = "@@VERSION@@"; -Composer.prototype = { - - constructor: Composer, +}( /* current root */ this, + /* module name */ "RegExComposer", + /* module factory */ function( exports, undef ) { + + "use strict"; + /* main code starts here */ + var __version__ = "0.4.1", OP = Object.prototype, AP = Array.prototype, + to_string = OP.toString, + + slice = function( a ) { return AP.slice.apply(a, AP.slice.call(arguments, 1)); }, - $level: 0, - $regex: null, - $parts: null, + esc = function( s ) { return s.replace(/([.*+?^${}()|[\]\/\\\-])/g, '\\$1'); }, + + flatten = function( a ) { + var r = [], i = 0; + while (i < a.length) r = r.concat(a[i++]); + return r; + }, + + getArgs = function( args, asArray ) { + /*var a = slice(args); + if ( asArray && a[0] && + ( a[0] instanceof Array || '[object Array]' == to_string.call(a[0]) ) + ) + a = a[0];*/ + return flatten( slice( args ) ); //a; + }, + + T_SEQ = 2, T_EITHER = 4, T_GROUP = 8, T_CHARGROUP = 16 + ; - dispose: function( ) { - var self = this; - self.$level = null; - self.$regex = null; - self.$parts = null; - return self; - }, - - reset: function( ) { - var self = this; - self.$level = 0; - self.$parts = [{part: [], type: T_SEQ, flag: ''}]; - return self; - }, - compose: function( /* flags */ ) { - var self = this; - self.$regex = new RegExp(self.$parts[0].part.join(''), slice(arguments).join('')); - self.reset( ); - return self.$regex; - }, + // A simple (js-flavored) regular expression composer + var Composer = function( ) { + this.$regex = null; + this.reset( ); + }; + Composer.VERSION = __version__; + Composer.prototype = { + + constructor: Composer, - partial: function( reset ) { - var self = this, p = self.$parts[0].part.join(''); - if ( false!==reset ) self.reset( ); - return p; - }, + $level: 0, + $regex: null, + $parts: null, - repeat: function( min, max, greedy ) { - var self = this; - if ( undef === min ) return self; - var repeat = ( undef === max ) ? ('{'+min+'}') : ('{'+min+','+max+'}'); - - self.$parts[self.$level].part[self.$parts[self.$level].part.length-1] += (false===greedy) ? (repeat+'?') : repeat; - return self; - }, - - zeroOrOne: function( greedy ) { - var self = this; - self.$parts[self.$level].part[self.$parts[self.$level].part.length-1] += (false===greedy) ? '??' : '?'; - return self; - }, - - zeroOrMore: function( greedy ) { - var self = this; - self.$parts[self.$level].part[self.$parts[self.$level].part.length-1] += (false===greedy) ? '*?' : '*'; - return self; - }, - - oneOrMore: function( greedy ) { - var self = this; - self.$parts[self.$level].part[self.$parts[self.$level].part.length-1] += (false===greedy) ? '+?' : '+'; - return self; - }, - - sub: function( partialRegex, withParen ) { - var self = this; - if ( undef !== partialRegex ) - { - if ( withParen ) partialRegex = '(' + partialRegex + ')'; - self.$parts[self.$level].part.push( partialRegex ); - } - return self; - }, - - match: function( literalStr ) { - var self = this; - if ( undef !== literalStr ) - self.$parts[self.$level].part.push( esc(literalStr) ); - return self; - }, - - startOfInput: function( ) { - var self = this; - self.$parts[self.$level].part.push('^'); - return self; - }, - - endOfInput: function( ) { - var self = this; - self.$parts[self.$level].part.push('$'); - return self; - }, - - any: function( ) { - var self = this; - self.$parts[self.$level].part.push('.'); - return self; - }, - - space: function( positive ) { - var self = this; - self.$parts[self.$level].part.push((false===positive) ? '\\S' : '\\s'); - return self; - }, - - digit: function( positive ) { - var self = this; - self.$parts[self.$level].part.push((false===positive) ? '\\D' : '\\d'); - return self; - }, - - word: function( positive ) { - var self = this; - self.$parts[self.$level].part.push((false===positive) ? '\\W' : '\\w'); - return self; - }, - - boundary: function( positive ) { - var self = this; - self.$parts[self.$level].part.push((false===positive) ? '\\B' : '\\b'); - return self; - }, - - LF: function( ) { - var self = this; - self.$parts[self.$level].part.push('\\n'); - return self; - }, - - CR: function( ) { - var self = this; - self.$parts[self.$level].part.push('\\r'); - return self; - }, - - TAB: function( ) { - var self = this; - self.$parts[self.$level].part.push('\\t'); - return self; - }, - - CTRL: function( _char ) { - var self = this; - if ( _char ) self.$parts[self.$level].part.push('\\c'+_char); - return self; - }, - - backSpace: function( ) { - var self = this; - self.$parts[self.$level].part.push('[\\b]'); - return self; - }, - - backReference: function( n ) { - var self = this; - self.$parts[self.$level].part.push('\\'+parseInt(n, 10)); - return self; - }, - - characters: function( ) { - var self = this; - if ( T_CHARGROUP == self.$parts[self.$level].type ) - { - var chars = getArgs(arguments, 1).map(esc).join(''); - self.$parts[self.$level].part.push( chars ); - } - return self; - }, - - range: function( start, end ) { - var self = this; - if ( T_CHARGROUP == self.$parts[self.$level].type ) - { - if ( undef === start || undef === end ) return self; - var range = esc(start) + '-' + esc(end); - self.$parts[self.$level].part.push( range ); - } - return self; - }, - - alternate: function( ) { - var self = this; - self.$level++; - self.$parts.push({part: [], type: T_EITHER, flag: ''}); - return self; - }, - - group: function( ) { - var self = this; - self.$level++; - self.$parts.push({part: [], type: T_GROUP, flag: ''}); - return self; - }, - - nonCaptureGroup: function( ) { - var self = this; - self.$level++; - self.$parts.push({part: [], type: T_GROUP, flag: '?:'}); - return self; - }, - - lookAheadGroup: function( positive ) { - var self = this; - self.$level++; - self.$parts.push({part: [], type: T_GROUP, flag: (false===positive) ? '?!' : '?='}); - return self; - }, - - characterGroup: function( positive ) { - var self = this; - self.$level++; - self.$parts.push({part: [], type: T_CHARGROUP, flag: (false===positive) ? '^' : ''}); - return self; - }, - - end: function( ) { - var self = this, prev = self.$parts.pop() || {}, - type = prev.type, - flag = prev.flag || '', - part = prev.part || [], - level - ; - - if (0 < self.$level) - { - level = --self.$level; + dispose: function( ) { + var self = this; + self.$level = null; + self.$regex = null; + self.$parts = null; + return self; + }, + + reset: function( ) { + var self = this; + self.$level = 0; + self.$parts = [{part: [], type: T_SEQ, flag: ''}]; + return self; + }, + + compose: function( /* flags */ ) { + var self = this; + self.$regex = new RegExp(self.$parts[0].part.join(''), slice(arguments).join('')); + self.reset( ); + return self.$regex; + }, + + partial: function( reset ) { + var self = this, p = self.$parts[0].part.join(''); + if ( false!==reset ) self.reset( ); + return p; + }, + + repeat: function( min, max, greedy ) { + var self = this; + if ( undef === min ) return self; + var repeat = ( undef === max ) ? ('{'+min+'}') : ('{'+min+','+max+'}'); - if ( (T_EITHER|T_GROUP) & type ) - self.$parts[level].part.push('(' + flag + part.join('|') + ')'); + self.$parts[self.$level].part[self.$parts[self.$level].part.length-1] += (false===greedy) ? (repeat+'?') : repeat; + return self; + }, + + zeroOrOne: function( greedy ) { + var self = this; + self.$parts[self.$level].part[self.$parts[self.$level].part.length-1] += (false===greedy) ? '??' : '?'; + return self; + }, + + zeroOrMore: function( greedy ) { + var self = this; + self.$parts[self.$level].part[self.$parts[self.$level].part.length-1] += (false===greedy) ? '*?' : '*'; + return self; + }, + + oneOrMore: function( greedy ) { + var self = this; + self.$parts[self.$level].part[self.$parts[self.$level].part.length-1] += (false===greedy) ? '+?' : '+'; + return self; + }, + + sub: function( partialRegex, withParen ) { + var self = this; + if ( undef !== partialRegex ) + { + if ( withParen ) partialRegex = '(' + partialRegex + ')'; + self.$parts[self.$level].part.push( partialRegex ); + } + return self; + }, + + literal: function( literalStr, withParen ) { + var self = this; + if ( undef !== literalStr ) + { + literalStr = withParen ? ('(' + esc(literalStr) + ')') : esc(literalStr); + self.$parts[self.$level].part.push( literalStr ); + } + return self; + }, + + startOfInput: function( ) { + var self = this; + self.$parts[self.$level].part.push('^'); + return self; + }, + + endOfInput: function( ) { + var self = this; + self.$parts[self.$level].part.push('$'); + return self; + }, + + any: function( ) { + var self = this; + self.$parts[self.$level].part.push('.'); + return self; + }, + + space: function( positive ) { + var self = this; + self.$parts[self.$level].part.push((false===positive) ? '\\S' : '\\s'); + return self; + }, + + digit: function( positive ) { + var self = this; + self.$parts[self.$level].part.push((false===positive) ? '\\D' : '\\d'); + return self; + }, + + word: function( positive ) { + var self = this; + self.$parts[self.$level].part.push((false===positive) ? '\\W' : '\\w'); + return self; + }, + + boundary: function( positive ) { + var self = this; + self.$parts[self.$level].part.push((false===positive) ? '\\B' : '\\b'); + return self; + }, + + LF: function( ) { + var self = this; + self.$parts[self.$level].part.push('\\n'); + return self; + }, + + CR: function( ) { + var self = this; + self.$parts[self.$level].part.push('\\r'); + return self; + }, + + TAB: function( ) { + var self = this; + self.$parts[self.$level].part.push('\\t'); + return self; + }, + + CTRL: function( _char ) { + var self = this; + if ( _char ) self.$parts[self.$level].part.push('\\c'+_char); + return self; + }, + + backSpace: function( ) { + var self = this; + self.$parts[self.$level].part.push('[\\b]'); + return self; + }, + + backReference: function( n ) { + var self = this; + self.$parts[self.$level].part.push('\\'+parseInt(n, 10)); + return self; + }, + + characters: function( ) { + var self = this; + if ( T_CHARGROUP == self.$parts[self.$level].type ) + { + var chars = getArgs(arguments, 1).map(esc).join(''); + self.$parts[self.$level].part.push( chars ); + } + return self; + }, + + range: function( start, end ) { + var self = this; + if ( T_CHARGROUP == self.$parts[self.$level].type ) + { + if ( undef === start || undef === end ) return self; + var range = esc(start) + '-' + esc(end); + self.$parts[self.$level].part.push( range ); + } + return self; + }, + + alternate: function( ) { + var self = this; + self.$level++; + self.$parts.push({part: [], type: T_EITHER, flag: ''}); + return self; + }, + + group: function( ) { + var self = this; + self.$level++; + self.$parts.push({part: [], type: T_GROUP, flag: ''}); + return self; + }, + + nonCaptureGroup: function( ) { + var self = this; + self.$level++; + self.$parts.push({part: [], type: T_GROUP, flag: '?:'}); + return self; + }, + + lookAheadGroup: function( positive ) { + var self = this; + self.$level++; + self.$parts.push({part: [], type: T_GROUP, flag: (false===positive) ? '?!' : '?='}); + return self; + }, + + characterGroup: function( positive ) { + var self = this; + self.$level++; + self.$parts.push({part: [], type: T_CHARGROUP, flag: (false===positive) ? '^' : ''}); + return self; + }, + + end: function( ) { + var self = this, prev = self.$parts.pop() || {}, + type = prev.type, + flag = prev.flag || '', + part = prev.part || [], + level + ; - else if ( T_CHARGROUP & type ) - self.$parts[level].part.push('[' + flag + part.join('') + ']'); + if (0 < self.$level) + { + level = --self.$level; + + if ( (T_EITHER|T_GROUP) & type ) + self.$parts[level].part.push('(' + flag + part.join('|') + ')'); + + else if ( T_CHARGROUP & type ) + self.$parts[level].part.push('[' + flag + part.join('') + ']'); + } + return self; } - return self; - } -}; -// aliases -var CP = Composer.prototype; -CP.startOfLine = CP.startOfInput; -CP.endOfLine = CP.endOfInput; -CP.subRegex = CP.sub; -CP.lineFeed = CP.LF; -CP.carriageReturn = CP.CR; -CP.tabulate = CP.tab = CP.TAB; -CP.control = CP.CTRL; -CP.wordBoundary = CP.boundary; -CP.either = CP.alternate; -CP.subGroup = CP.group; -CP.nonCaptureSubGroup = CP.nonCaptureGroup; -CP.lookAheadSubGroup = CP.lookAheadGroup; + }; + // aliases + var CP = Composer.prototype; + CP.startOfLine = CP.startOfInput; + CP.endOfLine = CP.endOfInput; + CP.match = CP.literal; + CP.subRegex = CP.sub; + CP.lineFeed = CP.LF; + CP.carriageReturn = CP.CR; + CP.tabulate = CP.tab = CP.TAB; + CP.control = CP.CTRL; + CP.wordBoundary = CP.boundary; + CP.either = CP.alternate; + CP.chars = CP.characters; + CP.charGroup = CP.characterGroup; + CP.subGroup = CP.subgroup = CP.group; + CP.nonCaptureSubGroup = CP.nonCaptureGroup; + CP.lookAheadSubGroup = CP.lookAheadGroup; -exports['@@MODULE_NAME@@'] = Composer; + /* main code ends here */ + /* export the module */ + return Composer; +}); \ No newline at end of file diff --git a/src/js/regexcomposer.min.js b/src/js/regexcomposer.min.js new file mode 100644 index 0000000..a49a0cc --- /dev/null +++ b/src/js/regexcomposer.min.js @@ -0,0 +1,10 @@ +/** +* +* RegExComposer +* @version: 0.4.1 +* +* A simple and intuitive Regular Expression Composer for PHP, Python, Node/JS +* https://github.com/foo123/regex-analyzer +* +**/ +!function(r,t,e){"use strict";var n,a="object"==typeof module&&module.exports,p="function"==typeof define&&define.amd;a?module.exports=(module.$deps=module.$deps||{})[t]=module.$deps[t]||e.call(r,{NODE:module})||1:p&&"function"==typeof require&&"function"==typeof require.specified&&require.specified(t)?define(t,["require","exports","module"],function(t,n,a){return e.call(r,{AMD:a})}):t in r||(r[t]=n=e.call(r,{})||1)&&p&&define(t,[],function(){return n})}(this,"RegExComposer",function(r,t){"use strict";var e="0.4.1",n=Object.prototype,a=Array.prototype,p=(n.toString,function(r){return a.slice.apply(r,a.slice.call(arguments,1))}),u=function(r){return r.replace(/([.*+?^${}()|[\]\/\\\-])/g,"\\$1")},s=function(r){for(var t=[],e=0;e Matched +XYZ0 -> Matched ================ diff --git a/test/js/test.js b/test/js/test.js index 3e6e996..46d26f0 100644 --- a/test/js/test.js +++ b/test/js/test.js @@ -6,7 +6,7 @@ var echo = console.log; echo("Testing Composer"); echo("================"); -var Composer = require('../../build/js/regexcomposer.js'); +var Composer = require('../../src/js/regexcomposer.js'); var identifierSubRegex = new Composer( ) .characterGroup( ) @@ -57,7 +57,7 @@ echo(); echo("Testing Analyzer"); echo("================"); -var Analyzer = require('../../build/js/regexanalyzer.js'), +var Analyzer = require('../../src/js/regexanalyzer.js'), anal, peekChars, sampleStr, inregex = process.argv[2] || /xyz[abc0-9]*/gmi ;