diff --git a/src/dfa.jl b/src/dfa.jl index 69f52ac2..fadb58f2 100644 --- a/src/dfa.jl +++ b/src/dfa.jl @@ -21,11 +21,11 @@ function nfa2dfa(nfa::NFA) S = epsilon_closure(Set([nfa.start])) start = new_dfanode(S) dfanodes = Dict([S => start]) - unvisited = Set([S]) + unvisited = [S] while !isempty(unvisited) S = pop!(unvisited) S_actions = accumulate_actions(S) - for l in 0x00:0xff + for l in keyrange(S) T = epsilon_closure(move(S, l)) if isempty(T) continue @@ -35,6 +35,9 @@ function nfa2dfa(nfa::NFA) end actions = Set{Action}() for s in S + if !haskey(s.trans, l) + continue + end T′ = s.trans[l] for t in T′ union!(actions, s.actions[(l, t)]) @@ -52,17 +55,31 @@ function nfa2dfa(nfa::NFA) return DFA(start) end +function keyrange(S::Set{NFANode}) + lo = 0xff + hi = 0x00 + for s in S + for l in bytekeys(s.trans) + lo = min(l, lo) + hi = max(l, hi) + end + end + return lo:hi +end + function move(S::Set{NFANode}, label::UInt8) T = Set{NFANode}() for s in S - union!(T, s.trans[label]) + if haskey(s.trans, label) + union!(T, s.trans[label]) + end end return T end function epsilon_closure(S::Set{NFANode}) closure = Set{NFANode}() - unvisited = Set(copy(S)) + unvisited = collect(S) while !isempty(unvisited) s = pop!(unvisited) push!(closure, s) @@ -105,7 +122,7 @@ function reduce_states(dfa::DFA) new_dfanode(s) = DFANode(Dict(), Set{Action}(), s.final, Set{NFANode}()) start = new_dfanode(dfa.start) S_start = equivalent(dfa.start) - dfanodes = Dict([S_start => start]) + dfanodes = Dict(S_start => start) unvisited = [(S_start, start)] while !isempty(unvisited) S, s′ = pop!(unvisited) diff --git a/src/nfa.jl b/src/nfa.jl index 1f920ebe..2753b8a8 100644 --- a/src/nfa.jl +++ b/src/nfa.jl @@ -39,6 +39,14 @@ function NFATransition() return NFATransition(trans, trans_eps) end +function bytekeys(trans::NFATransition) + return keys(trans.trans) +end + +function Base.haskey(trans::NFATransition, label::UInt8) + return haskey(trans.trans, label) +end + function Base.getindex(trans::NFATransition, label::UInt8) return trans.trans[label] end @@ -154,6 +162,18 @@ function re2nfa_rec(re::RegExp.RE, actions::Dict{Symbol,Action}) for b in re.args[1] addtrans!(start, b => final) end + elseif re.head == :bytes + if isempty(re.args) + addtrans!(start, :eps => final) + else + node = start + for b::UInt8 in re.args + next = NFANode() + addtrans!(node, b => next) + node = next + end + final = node + end elseif re.head == :cat lastnfa = NFA(start, final) addtrans!(start, :eps => final) diff --git a/src/re.jl b/src/re.jl index 111f9c95..5481e24d 100644 --- a/src/re.jl +++ b/src/re.jl @@ -50,7 +50,7 @@ function primitive(str::String) end function primitive(bs::Vector{UInt8}) - return RE(:bytes, [bs]) + return RE(:bytes, copy(bs)) end function primitive(x::Primitive, actions) @@ -325,9 +325,9 @@ function expand(re::RE) return expand(primitive(string(char), re.actions)) end elseif re.head == :str - return expand(primitive(convert(Vector{UInt8}, re.args[1]), re.actions)) + return RE(:bytes, convert(Vector{UInt8}, re.args[1]), re.actions) elseif re.head == :bytes - return RE(:cat, [primitive(ByteSet([b])) for b in re.args[1]], re.actions) + return re else @assert re.head ∉ PRIMITIVE return RE(re.head, [expand(arg) for arg in re.args], re.actions)