diff --git a/ext/ragel/base_lexer.rl b/ext/ragel/base_lexer.rl index df81277..1aec342 100644 --- a/ext/ragel/base_lexer.rl +++ b/ext/ragel/base_lexer.rl @@ -82,6 +82,12 @@ callback("on_doctype_type", data, encoding, ts, te); }; + # Consumes everything between the [ and ]. Due to the use of :> the ] + # is not consumed by any+. + '[' any+ :> ']' => { + callback("on_doctype_inline", data, encoding, ts + 1, te - 1); + }; + # Lex the public/system IDs as regular strings. dquote => { fcall string_dquote; }; squote => { fcall string_squote; }; diff --git a/lib/oga/xml/doctype.rb b/lib/oga/xml/doctype.rb index c98d6f1..6b071d0 100644 --- a/lib/oga/xml/doctype.rb +++ b/lib/oga/xml/doctype.rb @@ -19,8 +19,12 @@ module Oga # The system ID of the doctype. # @return [String] # + # @!attribute [rw] inline_rules + # The inline doctype rules. + # @return [String] + # class Doctype - attr_accessor :name, :type, :public_id, :system_id + attr_accessor :name, :type, :public_id, :system_id, :inline_rules ## # @example @@ -50,6 +54,7 @@ module Oga segments << " #{type}" if type segments << %Q{ "#{public_id}"} if public_id segments << %Q{ "#{system_id}"} if system_id + segments << " [#{inline_rules}]" if inline_rules return segments + '>' end @@ -70,6 +75,7 @@ module Oga #{spacing} type: #{type.inspect} #{spacing} public_id: #{public_id.inspect} #{spacing} system_id: #{system_id.inspect} +#{spacing} inline_rules: #{inline_rules.inspect} #{spacing}) EOF end diff --git a/lib/oga/xml/lexer.rb b/lib/oga/xml/lexer.rb index 78daf6b..37a6774 100644 --- a/lib/oga/xml/lexer.rb +++ b/lib/oga/xml/lexer.rb @@ -194,6 +194,15 @@ module Oga add_token(:T_DOCTYPE_END) end + ## + # Called on an inline doctype block. + # + # @param [String] value + # + def on_doctype_inline(value) + add_token(:T_DOCTYPE_INLINE, value) + end + ## # Called on the start of a CDATA tag. # diff --git a/lib/oga/xml/parser.y b/lib/oga/xml/parser.y index 66687f5..cfad791 100644 --- a/lib/oga/xml/parser.y +++ b/lib/oga/xml/parser.y @@ -11,6 +11,7 @@ class Oga::XML::Parser token T_STRING T_TEXT token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE T_DOCTYPE_NAME +token T_DOCTYPE_INLINE token T_CDATA_START T_CDATA_END token T_COMMENT_START T_COMMENT_END token T_ELEM_START T_ELEM_NAME T_ELEM_NS T_ELEM_END T_ATTR @@ -45,25 +46,36 @@ rule # : T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_END { - on_doctype(val[1]) + on_doctype(:name => val[1]) } # | T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_DOCTYPE_END { - on_doctype(val[1], val[2]) + on_doctype(:name => val[1], :type => val[2]) } # | T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_STRING T_DOCTYPE_END { - on_doctype(val[1], val[2], val[3]) + on_doctype(:name => val[1], :type => val[2], :public_id => val[3]) } # | T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_TYPE T_STRING T_STRING T_DOCTYPE_END { - on_doctype(val[1], val[2], val[3], val[4]) + on_doctype( + :name => val[1], + :type => val[2], + :public_id => val[3], + :system_id => val[4] + ) + } + + # + | T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_INLINE T_DOCTYPE_END + { + on_doctype(:name => val[1], :inline_rules => val[2]) } ; @@ -270,18 +282,10 @@ Unexpected #{name} with value #{value.inspect} on line #{@line}: end ## - # @param [String] name - # @param [String] type - # @param [String] public_id - # @param [String] system_id + # @param [Hash] options # - def on_doctype(name, type = nil, public_id = nil, system_id = nil) - return Doctype.new( - :name => name, - :type => type, - :public_id => public_id, - :system_id => system_id - ) + def on_doctype(options = {}) + return Doctype.new(options) end ## diff --git a/spec/oga/xml/doctype_spec.rb b/spec/oga/xml/doctype_spec.rb index 2e0f433..6eaf96d 100644 --- a/spec/oga/xml/doctype_spec.rb +++ b/spec/oga/xml/doctype_spec.rb @@ -45,11 +45,24 @@ describe Oga::XML::Doctype do instance.to_xml.should == '' end + + example 'include the inline rules if present' do + instance = described_class.new( + :name => 'html', + :inline_rules => '' + ) + + instance.to_xml.should == ']>' + end end context '#inspect' do before do - @instance = described_class.new(:name => 'html', :type => 'PUBLIC') + @instance = described_class.new( + :name => 'html', + :type => 'PUBLIC', + :inline_rules => '' + ) end example 'pretty-print the node' do @@ -59,6 +72,7 @@ Doctype( type: "PUBLIC" public_id: nil system_id: nil + inline_rules: "" ) EOF end diff --git a/spec/oga/xml/document_spec.rb b/spec/oga/xml/document_spec.rb index c0e160f..2aa8ff8 100644 --- a/spec/oga/xml/document_spec.rb +++ b/spec/oga/xml/document_spec.rb @@ -99,6 +99,7 @@ Document( type: nil public_id: nil system_id: nil + inline_rules: nil ) xml_declaration: XmlDeclaration( version: "1.0" diff --git a/spec/oga/xml/lexer/doctype_spec.rb b/spec/oga/xml/lexer/doctype_spec.rb index f4dc872..89a9bb0 100644 --- a/spec/oga/xml/lexer/doctype_spec.rb +++ b/spec/oga/xml/lexer/doctype_spec.rb @@ -31,5 +31,26 @@ describe Oga::XML::Lexer do [:T_DOCTYPE_END, nil, 1] ] end + + example 'lex an inline doctype' do + lex(']>').should == [ + [:T_DOCTYPE_START, nil, 1], + [:T_DOCTYPE_NAME, 'html', 1], + [:T_DOCTYPE_INLINE, '', 1], + [:T_DOCTYPE_END, nil, 1] + ] + end + + # Technically not valid, put in place to make sure that the Ragel rules are + # not too greedy. + example 'lex an inline doftype followed by a system ID' do + lex('] "foo">').should == [ + [:T_DOCTYPE_START, nil, 1], + [:T_DOCTYPE_NAME, 'html', 1], + [:T_DOCTYPE_INLINE, '', 1], + [:T_STRING, 'foo', 1], + [:T_DOCTYPE_END, nil, 1] + ] + end end end diff --git a/spec/oga/xml/parser/doctype_spec.rb b/spec/oga/xml/parser/doctype_spec.rb index 3ebe957..bf0ef03 100644 --- a/spec/oga/xml/parser/doctype_spec.rb +++ b/spec/oga/xml/parser/doctype_spec.rb @@ -80,4 +80,18 @@ describe Oga::XML::Parser do @document.doctype.system_id.should == 'bar' end end + + context 'doctypes with inline rules' do + before :all do + @document = parse(']>') + end + + example 'return a Doctype instance' do + @document.doctype.is_a?(Oga::XML::Doctype).should == true + end + + example 'set the inline doctype rules' do + @document.doctype.inline_rules.should == '' + end + end end