diff --git a/lib/docs/filters/threejs/clean_html.rb b/lib/docs/filters/threejs/clean_html.rb index abee6997c1..9b0de32445 100644 --- a/lib/docs/filters/threejs/clean_html.rb +++ b/lib/docs/filters/threejs/clean_html.rb @@ -1,243 +1,238 @@ module Docs class Threejs class CleanHtmlFilter < Filter + PATTERNS = { + method_this: /\[method:this\s+([^\]]+)\]\s*\((.*?)\)/, + method_return: /\[method:([^\s\]]+)\s+([^\]]+)\]\s*\((.*?)\)/, + method_no_params: /\[method:([^\s\]]+)\s+([^\]]+)\](?!\()/, + property: /\[property:([^\]]+?)\s+([^\]]+?)\]/, + example_link: /\[example:([^\s\]]+)\s+([^\]]+)\]/, + external_link_text: /\[link:([^\s\]]+)\s+([^\]]+)\]/, + external_link: /\[link:([^\]]+)\]/, + page_link_text: /\[page:([^\]]+?)\s+([^\]]+?)\]/, + page_link: /\[page:([^\]]+?)\]/, + inline_code: /`([^`]+)`/, + name_placeholder: /\[name\]/, + constructor_param: /\[param:([^\]]+?)\s+([^\]]+?)\]/ + }.freeze + def call - # Remove unnecessary elements + remove_unnecessary_elements + wrap_code_blocks + process_sections + format_links + add_section_structure + format_notes + add_heading_attributes + doc + end + + private + + def remove_unnecessary_elements css('head, script, style').remove - - # Wrap code blocks with pre tags and add syntax highlighting + end + + def wrap_code_blocks css('code').each do |node| - unless node.parent.name == 'pre' - pre = node.wrap('
') - pre['data-language'] = 'javascript' - pre['class'] = 'language-javascript' - end + next if node.parent.name == 'pre' + pre = node.wrap('') + pre['data-language'] = pre['class'] = 'language-javascript' end - + end + + def process_sections # Handle source links css('h2').each do |node| - if node.content.strip == 'Source' - content = node.next_element&.inner_html - if content - # Clean up any existing formatting - content = content.gsub(/<[^>]+>/, '') - # Extract the path from the content - if content =~ /src\/(.*?)\.js/ - path = "/#{$1}.js" - formatted_link = %Q(src#{path}) - node.next_element.inner_html = formatted_link if node.next_element - end - end - end + next unless node.content.strip == 'Source' + handle_source_link(node) end - # Handle method signatures + # Handle method signatures and properties css('h3').each do |node| content = node.inner_html - - # Handle [method:this methodName]( param1, param2, ... ) format - content = content.gsub(/\[method:this\s+([^\]]+)\]\s*\((.*?)\)/) do |match| - method_name, params_str = $1, $2 - - # Format parameters - params = params_str.split(',').map do |param| - param = param.strip - if param.include?(' ') - type, name = param.split(' ', 2).map(&:strip) - "#{type} #{name}" - else - "#{param}" - end - end.join(", ") - - "
#{name}
"
- end
-
+ content = handle_name_placeholders(content)
+ content = format_constructor_params(content)
node.inner_html = content
end
+ end
- # Clean up property formatting
- css('h3').each do |node|
- node.inner_html = node.inner_html.gsub(/\[property:([^\]]+?)\s+([^\]]+?)\]/) do |match|
- type, name = $1, $2
- "#{name}
"
+ end
+ end
+
+ def format_links
css('*').each do |node|
next if node.text?
- # Handle example links [example:tag Title]
- node.inner_html = node.inner_html.gsub(/\[example:([^\s\]]+)\s+([^\]]+)\]/) do |match|
- tag, title = $1, $2
- "#{title}"
- end
-
- # Handle external links with [link:url text] format
- node.inner_html = node.inner_html.gsub(/\[link:([^\s\]]+)\s+([^\]]+)\]/) do |match|
- url, text = $1, $2
- "#{text}"
- end
+ content = node.inner_html
+ .gsub(PATTERNS[:example_link]) { create_external_link("https://threejs.org/examples/##{$1}", $2) }
+ .gsub(PATTERNS[:external_link_text]) { create_external_link($1, $2) }
+ .gsub(PATTERNS[:external_link]) { create_external_link($1, $1) }
+ .gsub(PATTERNS[:page_link_text]) { create_internal_link($1, $2) }
+ .gsub(PATTERNS[:page_link]) { create_internal_link($1, $1) }
+
+ node.inner_html = content
+ end
- # Handle external links with [link:url] format
- node.inner_html = node.inner_html.gsub(/\[link:([^\]]+)\]/) do |match|
- url = $1
- "#{url}"
- end
+ normalize_href_attributes
+ end
- # Handle internal page links with text
- node.inner_html = node.inner_html.gsub(/\[page:([^\]]+?)\s+([^\]]+?)\]/) do
- path, text = $1, $2
- "#{text}
"
- end
+ def create_external_link(url, text)
+ %Q(#{text})
+ end
- # Handle internal page links without text
- node.inner_html = node.inner_html.gsub(/\[page:([^\]]+?)\]/) do |match|
- path = $1
- "#{path}
"
- end
- end
+ def create_internal_link(path, text)
+ %Q(#{text}
)
+ end
- # Fix all href attributes to be lowercase and remove .html
+ def normalize_href_attributes
css('a[href]').each do |link|
next if link['href'].start_with?('http')
link['href'] = link['href'].remove('../').downcase.sub(/\.html$/, '')
link['class'] = 'reference internal'
end
+ end
- # Add section classes
+ def add_section_structure
css('h2').each do |node|
node['class'] = 'section-title'
section = node.next_element
- if section
- wrapper = doc.document.create_element('div')
- wrapper['class'] = 'section'
- node.after(wrapper)
- wrapper.add_child(node)
- current = section
- while current && current.name != 'h2'
- next_el = current.next
- wrapper.add_child(current)
- current = next_el
- end
+ next unless section
+
+ wrapper = doc.document.create_element('div')
+ wrapper['class'] = 'section'
+ node.after(wrapper)
+ wrapper.add_child(node)
+
+ current = section
+ while current && current.name != 'h2'
+ next_el = current.next
+ wrapper.add_child(current)
+ current = next_el
end
end
- # Format description paragraphs
- css('p.desc').each do |node|
- node['class'] = 'section-desc'
- end
+ css('p.desc').each { |node| node['class'] = 'section-desc' }
+ end
- # Handle inline code/backticks in text
- css('p, li, dt, dd').each do |node|
- next if node.at_css('pre') # Skip if contains a code block
+ def format_notes
+ css('p').each do |node|
+ next unless node.content.start_with?('Note:')
- # Replace backticks with proper code formatting
- node.inner_html = node.inner_html.gsub(/`([^`]+)`/) do |match|
- code = $1
- "#{code}
"
- end
+ wrapper = doc.document.create_element('div')
+ wrapper['class'] = 'admonition note'
+
+ title = doc.document.create_element('p')
+ title['class'] = 'first admonition-title'
+ title.content = 'Note'
+
+ content = doc.document.create_element('p')
+ content['class'] = 'last'
+ content.inner_html = node.inner_html.sub('Note:', '').strip
+
+ wrapper.add_child(title)
+ wrapper.add_child(content)
+ node.replace(wrapper)
end
+ end
- # Handle inline code in property descriptions
- css('.property-type').each do |node|
- node.inner_html = node.inner_html.gsub(/`([^`]+)`/) do |match|
- code = $1
- "#{code}
"
- end
- end
-
- # Add proper heading IDs and classes
+ def add_heading_attributes
css('h1, h2, h3, h4').each do |node|
node['id'] ||= node.content.strip.downcase.gsub(/[^\w]+/, '-')
existing_class = node['class'].to_s
node['class'] = "#{existing_class} section-header"
end
- # Add note styling
- css('p').each do |node|
- if node.content.start_with?('Note:')
- wrapper = doc.document.create_element('div')
- wrapper['class'] = 'admonition note'
-
- title = doc.document.create_element('p')
- title['class'] = 'first admonition-title'
- title.content = 'Note'
-
- content = doc.document.create_element('p')
- content['class'] = 'last'
- content.inner_html = node.inner_html.sub('Note:', '').strip
-
- wrapper.add_child(title)
- wrapper.add_child(content)
- node.replace(wrapper)
+ format_inline_code
+ end
+
+ def format_inline_code
+ selectors = ['p', 'li', 'dt', 'dd', '.property-type'].join(', ')
+ css(selectors).each do |node|
+ next if node.at_css('pre')
+ node.inner_html = node.inner_html.gsub(PATTERNS[:inline_code]) do |match|
+ "#{$1}
"
end
end
- doc
end
end
end