@@ -122,7 +122,7 @@ def test_simple_html(self):
122122<Img sRc='Bar' isMAP>sample
123123text
124124“
125- <!--comment2a-- --comment2b--><!>
125+ <!--comment2a-- --comment2b-->
126126</Html>
127127""" , [
128128 ("data" , "\n " ),
@@ -157,24 +157,6 @@ def test_unclosed_entityref(self):
157157 ("data" , " foo" ),
158158 ])
159159
160- def test_doctype_decl (self ):
161- inside = """\
162- DOCTYPE html [
163- <!ELEMENT html - O EMPTY>
164- <!ATTLIST html
165- version CDATA #IMPLIED
166- profile CDATA 'DublinCore'>
167- <!NOTATION datatype SYSTEM 'http://xml.python.org/notations/python-module'>
168- <!ENTITY myEntity 'internal parsed entity'>
169- <!ENTITY anEntity SYSTEM 'http://xml.python.org/entities/something.xml'>
170- <!ENTITY % paramEntity 'name|name|name'>
171- %paramEntity;
172- <!-- comment -->
173- ]"""
174- self ._run_check ("<!%s>" % inside , [
175- ("decl" , inside ),
176- ])
177-
178160 def test_bad_nesting (self ):
179161 # Strangely, this *is* supposed to test that overlapping
180162 # elements are allowed. HTMLParser is more geared toward
@@ -247,6 +229,30 @@ def test_starttag_junk_chars(self):
247229 self ._parse_error ("<a foo='>'" )
248230 self ._parse_error ("<a foo='>" )
249231
232+ def test_valid_doctypes (self ):
233+ # from http://www.w3.org/QA/2002/04/valid-dtd-list.html
234+ dtds = ['HTML' , # HTML5 doctype
235+ ('HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
236+ '"http://www.w3.org/TR/html4/strict.dtd"' ),
237+ ('HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" '
238+ '"http://www.w3.org/TR/html4/loose.dtd"' ),
239+ ('html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" '
240+ '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"' ),
241+ ('html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" '
242+ '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"' ),
243+ ('math PUBLIC "-//W3C//DTD MathML 2.0//EN" '
244+ '"http://www.w3.org/Math/DTD/mathml2/mathml2.dtd"' ),
245+ ('html PUBLIC "-//W3C//DTD '
246+ 'XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" '
247+ '"http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd"' ),
248+ ('svg PUBLIC "-//W3C//DTD SVG 1.1//EN" '
249+ '"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"' ),
250+ 'html PUBLIC "-//IETF//DTD HTML 2.0//EN"' ,
251+ 'html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"' ]
252+ for dtd in dtds :
253+ self ._run_check ("<!DOCTYPE %s>" % dtd ,
254+ [('decl' , 'DOCTYPE ' + dtd )])
255+
250256 def test_declaration_junk_chars (self ):
251257 self ._parse_error ("<!DOCTYPE foo $ >" )
252258
@@ -384,8 +390,7 @@ def test_starttag_junk_chars(self):
384390 self ._run_check ("<a foo='>" , [('data' , "<a foo='>" )])
385391
386392 def test_declaration_junk_chars (self ):
387- # XXX this is wrong
388- self ._run_check ("<!DOCTYPE foo $ >" , [('comment' , 'DOCTYPE foo $ ' )])
393+ self ._run_check ("<!DOCTYPE foo $ >" , [('decl' , 'DOCTYPE foo $ ' )])
389394
390395 def test_illegal_declarations (self ):
391396 # XXX this might be wrong
@@ -510,11 +515,14 @@ def test_broken_comments(self):
510515 html = ('<! not really a comment >'
511516 '<! not a comment either -->'
512517 '<! -- close enough -->'
518+ '<!><!<-- this was an empty comment>'
513519 '<!!! another bogus comment !!!>' )
514520 expected = [
515521 ('comment' , ' not really a comment ' ),
516522 ('comment' , ' not a comment either --' ),
517523 ('comment' , ' -- close enough --' ),
524+ ('comment' , '' ),
525+ ('comment' , '<-- this was an empty comment' ),
518526 ('comment' , '!! another bogus comment !!!' ),
519527 ]
520528 self ._run_check (html , expected )
0 commit comments