Skip to content

Commit

Permalink
Merge pull request rails#40883 from theojulienne/fsm-include-constrai…
Browse files Browse the repository at this point in the history
…nted-routes

actionpack: Improve performance by allowing routes with custom regexes in the FSM.
  • Loading branch information
jhawthorn authored Jan 5, 2021
2 parents 5facfbf + c67c764 commit 968de9c
Show file tree
Hide file tree
Showing 12 changed files with 206 additions and 67 deletions.
23 changes: 11 additions & 12 deletions actionpack/lib/action_dispatch/journey/gtg/builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ module ActionDispatch
module Journey # :nodoc:
module GTG # :nodoc:
class Builder # :nodoc:
DUMMY = Nodes::Dummy.new
DUMMY_END_NODE = Nodes::Dummy.new

attr_reader :root, :ast, :endpoints

def initialize(root)
@root = root
@ast = Nodes::Cat.new root, DUMMY
@ast = Nodes::Cat.new root, DUMMY_END_NODE
@followpos = build_followpos
end

Expand All @@ -28,12 +28,12 @@ def transition_table
marked[s] = true # mark s

s.group_by { |state| symbol(state) }.each do |sym, ps|
u = ps.flat_map { |l| @followpos[l] }
u = ps.flat_map { |l| @followpos[l] }.uniq
next if u.empty?

from = state_id[s]

if u.all? { |pos| pos == DUMMY }
if u.all? { |pos| pos == DUMMY_END_NODE }
to = state_id[Object.new]
dtrans[from, to] = sym
dtrans.add_accepting(to)
Expand All @@ -43,9 +43,9 @@ def transition_table
to = state_id[u]
dtrans[from, to] = sym

if u.include?(DUMMY)
if u.include?(DUMMY_END_NODE)
ps.each do |state|
if @followpos[state].include?(DUMMY)
if @followpos[state].include?(DUMMY_END_NODE)
dtrans.add_memo(to, state.memo)
end
end
Expand All @@ -66,7 +66,10 @@ def nullable?(node)
when Nodes::Group
true
when Nodes::Star
true
# the default star regex is /(.+)/ which is NOT nullable
# but since different constraints can be provided we must
# actually check if this is the case or not.
node.regexp.match?("")
when Nodes::Or
node.children.any? { |c| nullable?(c) }
when Nodes::Cat
Expand Down Expand Up @@ -104,7 +107,7 @@ def firstpos(node)
def lastpos(node)
case node
when Nodes::Star
firstpos(node.left)
lastpos(node.left)
when Nodes::Or
node.children.flat_map { |c| lastpos(c) }.tap(&:uniq!)
when Nodes::Cat
Expand All @@ -131,10 +134,6 @@ def build_followpos
lastpos(n.left).each do |i|
table[i] += firstpos(n.right)
end
when Nodes::Star
lastpos(n).each do |i|
table[i] += firstpos(n)
end
end
end
table
Expand Down
14 changes: 10 additions & 4 deletions actionpack/lib/action_dispatch/journey/gtg/simulator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def initialize(memos)
end

class Simulator # :nodoc:
INITIAL_STATE = [0].freeze
INITIAL_STATE = [ [0, nil] ].freeze

attr_reader :tt

Expand All @@ -25,13 +25,19 @@ def initialize(transition_table)
def memos(string)
input = StringScanner.new(string)
state = INITIAL_STATE
start_index = 0

while sym = input.scan(%r([/.?]|[^/.?]+))
state = tt.move(state, sym)
end_index = start_index + sym.length

state = tt.move(state, string, start_index, end_index)

start_index = end_index
end

acceptance_states = state.each_with_object([]) do |s, memos|
memos.concat(tt.memo(s)) if tt.accepting?(s)
acceptance_states = state.each_with_object([]) do |s_d, memos|
s, idx = s_d
memos.concat(tt.memo(s)) if idx.nil? && tt.accepting?(s)
end

acceptance_states.empty? ? yield : acceptance_states
Expand Down
84 changes: 66 additions & 18 deletions actionpack/lib/action_dispatch/journey/gtg/transition_table.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,15 @@ class TransitionTable # :nodoc:

attr_reader :memos

DEFAULT_EXP = /[^.\/?]+/
DEFAULT_EXP_ANCHORED = Regexp.new(/\A#{DEFAULT_EXP}\Z/)

def initialize
@regexp_states = {}
@string_states = {}
@accepting = {}
@memos = Hash.new { |h, k| h[k] = [] }
@stdparam_states = {}
@regexp_states = {}
@string_states = {}
@accepting = {}
@memos = Hash.new { |h, k| h[k] = [] }
end

def add_accepting(state)
Expand All @@ -41,22 +45,54 @@ def eclosure(t)
Array(t)
end

def move(t, a)
def move(t, full_string, start_index, end_index)
return [] if t.empty?

regexps = []
strings = []
next_states = []

t.each { |s|
if states = @regexp_states[s]
states.each { |re, v| regexps << v if re.match?(a) && !v.nil? }
tok = full_string.slice(start_index, end_index - start_index)
token_matches_default_component = DEFAULT_EXP_ANCHORED.match?(tok)

t.each { |s, previous_start|
if previous_start.nil?
# In the simple case of a "default" param regex do this fast-path
# and add all next states.
if token_matches_default_component && states = @stdparam_states[s]
states.each { |re, v| next_states << [v, nil].freeze if !v.nil? }
end

# When we have a literal string, we can just pull the next state
if states = @string_states[s]
next_states << [states[tok], nil].freeze unless states[tok].nil?
end
end

if states = @string_states[s]
strings << states[a] unless states[a].nil?
# For regexes that aren't the "default" style, they may potentially
# not be terminated by the first "token" [./?], so we need to continue
# to attempt to match this regexp as well as any successful paths that
# continue out of it. both paths could be valid.
if states = @regexp_states[s]
slice_start = if previous_start.nil?
start_index
else
previous_start
end

slice_length = end_index - slice_start
curr_slice = full_string.slice(slice_start, slice_length)

states.each { |re, v|
# if we match, we can try moving past this
next_states << [v, nil].freeze if !v.nil? && re.match?(curr_slice)
}

# and regardless, we must continue accepting tokens and retrying this regexp.
# we need to remember where we started as well so we can take bigger slices.
next_states << [s, slice_start].freeze
end
}
strings.concat regexps

next_states
end

def as_json(options = nil)
Expand All @@ -69,9 +105,10 @@ def as_json(options = nil)
end

{
regexp_states: simple_regexp,
string_states: @string_states,
accepting: @accepting
regexp_states: simple_regexp,
string_states: @string_states,
stdparam_states: @stdparam_states,
accepting: @accepting
}
end

Expand Down Expand Up @@ -125,18 +162,25 @@ def visualizer(paths, title = "FSM")

def []=(from, to, sym)
to_mappings = states_hash_for(sym)[from] ||= {}
if sym.is_a? Regexp
# we must match the whole string to a token boundary
sym = Regexp.new(/\A#{sym}\Z/)
end
to_mappings[sym] = to
end

def states
ss = @string_states.keys + @string_states.values.flat_map(&:values)
ps = @stdparam_states.keys + @stdparam_states.values.flat_map(&:values)
rs = @regexp_states.keys + @regexp_states.values.flat_map(&:values)
(ss + rs).uniq
(ss + ps + rs).uniq
end

def transitions
@string_states.flat_map { |from, hash|
hash.map { |s, to| [from, s, to] }
} + @stdparam_states.flat_map { |from, hash|
hash.map { |s, to| [from, s, to] }
} + @regexp_states.flat_map { |from, hash|
hash.map { |s, to| [from, s, to] }
}
Expand All @@ -148,7 +192,11 @@ def states_hash_for(sym)
when String
@string_states
when Regexp
@regexp_states
if sym == DEFAULT_EXP
@stdparam_states
else
@regexp_states
end
else
raise ArgumentError, "unknown symbol: %s" % sym.class
end
Expand Down
9 changes: 9 additions & 0 deletions actionpack/lib/action_dispatch/journey/nodes/node.rb
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,15 @@ def group?; true; end
end

class Star < Unary # :nodoc:
attr_accessor :regexp

def initialize(left)
super(left)

# By default wildcard routes are non-greedy and must match something.
@regexp = /.+?/
end

def star?; true; end
def type; :STAR; end

Expand Down
21 changes: 21 additions & 0 deletions actionpack/lib/action_dispatch/journey/path/pattern.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,27 @@ def ast
@spec
end

def requirements_anchored?
# each required param must not be surrounded by a literal, otherwise it isn't simple to chunk-match the url piecemeal
terminals = ast.find_all { |t| t.is_a?(Nodes::Terminal) }

terminals.each_with_index { |s, index|
next if index < 1
next unless s.symbol?

back = terminals[index - 1]
fwd = terminals[index + 1]

# we also don't support this yet, constraints must be regexps
return false if s.regexp.is_a?(Array)

return false if back.literal?
return false if !fwd.nil? && fwd.literal?
}

true
end

def names
@names ||= spec.find_all(&:symbol?).map(&:name)
end
Expand Down
2 changes: 2 additions & 0 deletions actionpack/lib/action_dispatch/journey/route.rb
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ def ast
@decorated_ast ||= begin
decorated_ast = path.ast
decorated_ast.find_all(&:terminal?).each { |n| n.memo = self }
# inject any regexp requirements for `star` nodes so they can be determined nullable, which requires knowing if the regex accepts an empty string.
decorated_ast.find_all(&:star?).each { |n| n.regexp = path.requirements[n.name.to_sym] unless path.requirements[n.name.to_sym].nil? }
decorated_ast
end
end
Expand Down
2 changes: 1 addition & 1 deletion actionpack/lib/action_dispatch/journey/router.rb
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def visualizer
private
def partitioned_routes
routes.partition { |r|
r.path.anchored && r.ast.grep(Nodes::Symbol).all? { |n| n.default_regexp? }
r.path.anchored && r.path.requirements_anchored?
}
end

Expand Down
2 changes: 1 addition & 1 deletion actionpack/lib/action_dispatch/journey/routes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def clear
end

def partition_route(route)
if route.path.anchored && route.ast.grep(Nodes::Symbol).all?(&:default_regexp?)
if route.path.anchored && route.path.requirements_anchored?
anchored_routes << route
else
custom_routes << route
Expand Down
Loading

0 comments on commit 968de9c

Please sign in to comment.