Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit af96a74

Browse files
committed
Fix several treewalker issues including the lack of end tag tokens in the elementtree treewalker
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40991
1 parent a83fbe4 commit af96a74

File tree

4 files changed

+37
-19
lines changed

4 files changed

+37
-19
lines changed

src/html5lib/treewalkers/_base.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ def __iter__(self):
103103
details = self.getNodeDetails(currentNode)
104104
type, details = details[0], details[1:]
105105
hasChildren = False
106+
endTag = None
106107

107108
if type == DOCTYPE:
108109
yield self.doctype(*details)
@@ -118,6 +119,7 @@ def __iter__(self):
118119
yield token
119120
hasChildren = False
120121
else:
122+
endTag = name
121123
yield self.startTag(name, attributes)
122124

123125
elif type == COMMENT:

src/html5lib/treewalkers/etree.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -78,19 +78,20 @@ def getFirstChild(self, node):
7878
return (node, 0, parents)
7979

8080
def getNextSibling(self, node):
81-
assert isinstance(node, tuple), "Node is not a tuple: " + str(node)
82-
83-
elt, key, parents = node
84-
if key == "text":
85-
key = -1
86-
elif key == "tail":
87-
elt, key = parents.pop()
81+
if isinstance(node, tuple):
82+
elt, key, parents = node
83+
if key == "text":
84+
key = -1
85+
elif key == "tail":
86+
elt, key = parents.pop()
87+
else:
88+
# Look for "tail" of the "revisited" node
89+
child = elt[key]
90+
if child.tail:
91+
parents.append((elt, key))
92+
return (child, "tail", parents)
8893
else:
89-
# Look for "tail" of the "revisited" node
90-
child = elt[key]
91-
if child.tail:
92-
parents.append((elt, key))
93-
return (child, "tail", parents)
94+
return None
9495

9596
# case where key were "text" or "tail" or elt[key] had a tail
9697
key += 1
@@ -106,7 +107,6 @@ def getParentNode(self, node):
106107
elt, key = parents.pop()
107108
return elt, key, parents
108109
else:
109-
# HACK: We could return ``elt`` but None will stop the algorithm the same way
110-
return None
110+
return elt
111111

112112
return locals()

src/html5lib/treewalkers/genshistream.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from genshi.core import START, END, XML_DECL, DOCTYPE, TEXT, \
1+
from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT, \
22
START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
33
from genshi.output import NamespaceFlattener
44

@@ -59,7 +59,7 @@ def tokens(self, event, next):
5959
elif kind == DOCTYPE:
6060
yield self.doctype(*data)
6161

62-
elif kind in (XML_DECL, DOCTYPE, START_NS, END_NS, \
62+
elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS, \
6363
START_CDATA, END_CDATA, PI):
6464
pass
6565

tests/test_treewalkers.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -208,19 +208,35 @@ def runTest(self, innerHTML, input, expected, errors, treeClass):
208208
document = p.parse(StringIO.StringIO(input))
209209
document = treeClass.get("adapter", lambda x: x)(document)
210210
try:
211-
output = convertTokens(LintFilter(treeClass["walker"](document)))
211+
output = convertTokens(treeClass["walker"](document))
212212
output = attrlist.sub(sortattrs, output)
213213
expected = attrlist.sub(sortattrs, convertExpected(expected))
214214
self.assertEquals(expected, output, "\n".join([
215215
"", "Input:", input,
216216
"", "Expected:", expected,
217217
"", "Recieved:", output
218218
]))
219-
except LintError, le:
220-
self.fail(input + "\n" + le.message)
221219
except NotImplementedError:
222220
pass # Amnesty for those that confess...
223221

222+
class TokenTestCase(unittest.TestCase):
223+
def test_all_tokens(self):
224+
expected = [
225+
{'data': [], 'type': 'StartTag', 'name': u'html'},
226+
{'data': [], 'type': 'StartTag', 'name': u'head'},
227+
{'data': [], 'type': 'EndTag', 'name': u'head'},
228+
{'data': [], 'type': 'StartTag', 'name': u'body'},
229+
{'data': [], 'type': 'EndTag', 'name': u'body'},
230+
{'data': [], 'type': 'EndTag', 'name': u'html'}]
231+
for treeName, treeCls in treeTypes.iteritems():
232+
p = html5parser.HTMLParser(tree = treeCls["builder"])
233+
document = p.parse("<html></html>")
234+
document = treeCls.get("adapter", lambda x: x)(document)
235+
output = treeCls["walker"](document)
236+
for expectedToken, outputToken in zip(expected, output):
237+
self.assertEquals(expectedToken, outputToken)
238+
239+
224240
def buildTestSuite():
225241
sys.stdout.write('Testing tree walkers '+ " ".join(treeTypes.keys()) + "\n")
226242

0 commit comments

Comments
 (0)