From 7e5e2fc262c1d93ebb5a61a310400ff395ac7759 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Wed, 19 Jun 2024 11:21:43 +0900 Subject: [PATCH] Fix to handle unclosed doctype decls in `BaseParser` See https://github.com/ruby/rexml/pull/153#discussion_r1645228365 --- lib/rexml/parsers/baseparser.rb | 10 +++++++++- lib/rexml/parsers/treeparser.rb | 12 ------------ test/parse/test_document_type_declaration.rb | 14 ++++++-------- 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 272d8a6b..5791ab1d 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -216,7 +216,12 @@ def pull_event x, @closed = @closed, nil return [ :end_element, x ] end - return [ :end_document ] if empty? + if empty? + if @document_status == :in_doctype + raise ParseException.new("Malformed DOCTYPE: unclosed", @source) + end + return [ :end_document ] + end return @stack.shift if @stack.size > 0 #STDERR.puts @source.encoding #STDERR.puts "BUFFER = #{@source.buffer.inspect}" @@ -373,6 +378,9 @@ def pull_event @document_status = :after_doctype return [ :end_doctype ] end + if @document_status == :in_doctype + raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source) + end end if @document_status == :after_doctype @source.match(/\s*/um, true) diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb index 9ddb54db..04dddea9 100644 --- a/lib/rexml/parsers/treeparser.rb +++ b/lib/rexml/parsers/treeparser.rb @@ -24,20 +24,12 @@ def parse #STDERR.puts "TREEPARSER GOT #{event.inspect}" case event[0] when :end_document - if in_doctype - raise ParseException.new("Malformed DOCTYPE: unclosed", - @parser.source, @parser) - end unless tag_stack.empty? raise ParseException.new("No close tag for #{@build_context.xpath}", @parser.source, @parser) end return when :start_element - if in_doctype - raise ParseException.new("Malformed DOCTYPE: unclosed", - @parser.source, @parser) - end tag_stack.push(event[1]) el = @build_context = @build_context.add_element( event[1] ) event[2].each do |key, value| @@ -47,10 +39,6 @@ def parse tag_stack.pop @build_context = @build_context.parent when :text - if in_doctype - raise ParseException.new("Malformed DOCTYPE: unclosed", - @parser.source, @parser) - end if @build_context[-1].instance_of? Text @build_context[-1] << event[1] else diff --git a/test/parse/test_document_type_declaration.rb b/test/parse/test_document_type_declaration.rb index 8a726476..3ca0b536 100644 --- a/test/parse/test_document_type_declaration.rb +++ b/test/parse/test_document_type_declaration.rb @@ -56,14 +56,12 @@ def test_no_name class TestUnclosed < self def test_no_extra_node exception = assert_raise(REXML::ParseException) do - REXML::Document.new(<<~DOCTYPE) - #{' '} DETAIL end @@ -91,11 +89,11 @@ def test_text DOCTYPE end assert_equal(<<~DETAIL.chomp, exception.to_s) - Malformed DOCTYPE: unclosed + Malformed DOCTYPE: invalid declaration Line: 1 Position: 21 Last 80 unconsumed characters: - + text#{' '} DETAIL end end