|
6 | 6 | import pyexpat |
7 | 7 | from xml.parsers import expat |
8 | 8 |
|
9 | | -from test_support import sortdict |
| 9 | +from test_support import sortdict, TestFailed |
10 | 10 |
|
11 | 11 | class Outputter: |
12 | 12 | def StartElementHandler(self, name, attrs): |
@@ -218,3 +218,96 @@ def collector(name, *args): |
218 | 218 | print "(it didn't)" |
219 | 219 | print "L =", `L` |
220 | 220 | break |
| 221 | + |
| 222 | +# Tests of the buffer_text attribute. |
| 223 | +import sys |
| 224 | + |
| 225 | +class TextCollector: |
| 226 | + def __init__(self, parser): |
| 227 | + self.stuff = [] |
| 228 | + |
| 229 | + def check(self, expected, label): |
| 230 | + require(self.stuff == expected, |
| 231 | + "%s\nstuff = %s\nexpected = %s" |
| 232 | + % (label, `self.stuff`, `map(unicode, expected)`)) |
| 233 | + |
| 234 | + def CharacterDataHandler(self, text): |
| 235 | + self.stuff.append(text) |
| 236 | + |
| 237 | + def StartElementHandler(self, name, attrs): |
| 238 | + self.stuff.append("<%s>" % name) |
| 239 | + bt = attrs.get("buffer-text") |
| 240 | + if bt == "yes": |
| 241 | + parser.buffer_text = 1 |
| 242 | + elif bt == "no": |
| 243 | + parser.buffer_text = 0 |
| 244 | + |
| 245 | + def EndElementHandler(self, name): |
| 246 | + self.stuff.append("</%s>" % name) |
| 247 | + |
| 248 | + def CommentHandler(self, data): |
| 249 | + self.stuff.append("<!--%s-->" % data) |
| 250 | + |
| 251 | +def require(cond, label): |
| 252 | + # similar to confirm(), but no extraneous output |
| 253 | + if not cond: |
| 254 | + raise TestFailed(label) |
| 255 | + |
| 256 | +def setup(handlers=[]): |
| 257 | + parser = expat.ParserCreate() |
| 258 | + require(not parser.buffer_text, |
| 259 | + "buffer_text not disabled by default") |
| 260 | + parser.buffer_text = 1 |
| 261 | + handler = TextCollector(parser) |
| 262 | + parser.CharacterDataHandler = handler.CharacterDataHandler |
| 263 | + for name in handlers: |
| 264 | + setattr(parser, name, getattr(handler, name)) |
| 265 | + return parser, handler |
| 266 | + |
| 267 | +parser, handler = setup() |
| 268 | +require(parser.buffer_text, |
| 269 | + "text buffering either not acknowledged or not enabled") |
| 270 | +parser.Parse("<a>1<b/>2<c/>3</a>", 1) |
| 271 | +handler.check(["123"], |
| 272 | + "buffered text not properly collapsed") |
| 273 | + |
| 274 | +# XXX This test exposes more detail of Expat's text chunking than we |
| 275 | +# XXX like, but it tests what we need to concisely. |
| 276 | +parser, handler = setup(["StartElementHandler"]) |
| 277 | +parser.Parse("<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1) |
| 278 | +handler.check(["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"], |
| 279 | + "buffering control not reacting as expected") |
| 280 | + |
| 281 | +parser, handler = setup() |
| 282 | +parser.Parse("<a>1<b/><2><c/> \n 3</a>", 1) |
| 283 | +handler.check(["1<2> \n 3"], |
| 284 | + "buffered text not properly collapsed") |
| 285 | + |
| 286 | +parser, handler = setup(["StartElementHandler"]) |
| 287 | +parser.Parse("<a>1<b/>2<c/>3</a>", 1) |
| 288 | +handler.check(["<a>", "1", "<b>", "2", "<c>", "3"], |
| 289 | + "buffered text not properly split") |
| 290 | + |
| 291 | +parser, handler = setup(["StartElementHandler", "EndElementHandler"]) |
| 292 | +parser.CharacterDataHandler = None |
| 293 | +parser.Parse("<a>1<b/>2<c/>3</a>", 1) |
| 294 | +handler.check(["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"], |
| 295 | + "huh?") |
| 296 | + |
| 297 | +parser, handler = setup(["StartElementHandler", "EndElementHandler"]) |
| 298 | +parser.Parse("<a>1<b></b>2<c/>3</a>", 1) |
| 299 | +handler.check(["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"], |
| 300 | + "huh?") |
| 301 | + |
| 302 | +parser, handler = setup(["CommentHandler", "EndElementHandler", |
| 303 | + "StartElementHandler"]) |
| 304 | +parser.Parse("<a>1<b/>2<c></c>345</a> ", 1) |
| 305 | +handler.check(["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"], |
| 306 | + "buffered text not properly split") |
| 307 | + |
| 308 | +parser, handler = setup(["CommentHandler", "EndElementHandler", |
| 309 | + "StartElementHandler"]) |
| 310 | +parser.Parse("<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1) |
| 311 | +handler.check(["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", |
| 312 | + "<!--abc-->", "4", "<!--def-->", "5", "</a>"], |
| 313 | + "buffered text not properly split") |
0 commit comments