|
20 | 20 | import xml.dom.esis_builder |
21 | 21 |
|
22 | 22 |
|
| 23 | +DEBUG_PARA_FIXER = 0 |
| 24 | + |
| 25 | + |
23 | 26 | # Workaround to deal with invalid documents (multiple root elements). This |
24 | 27 | # does not indicate a bug in the DOM implementation. |
25 | 28 | # |
@@ -323,12 +326,157 @@ def cleanup_synopses(doc): |
323 | 326 | create_module_info(doc, node) |
324 | 327 |
|
325 | 328 |
|
| 329 | +FIXUP_PARA_ELEMENTS = ( |
| 330 | + "chapter", |
| 331 | + "section", "subsection", "subsubsection", |
| 332 | + "paragraph", "subparagraph") |
| 333 | + |
| 334 | +PARA_LEVEL_ELEMENTS = ( |
| 335 | + "moduleinfo", "title", "opcodedesc", |
| 336 | + "verbatim", "funcdesc", "methoddesc", "excdesc", "datadesc", |
| 337 | + "funcdescni", "methoddescni", "excdescni", "datadescni", |
| 338 | + "tableii", "tableiii", "tableiv", "localmoduletable", |
| 339 | + "sectionauthor", |
| 340 | + # include <para>, so we can just do it again to get subsequent paras: |
| 341 | + "para", |
| 342 | + ) |
| 343 | + |
| 344 | +PARA_LEVEL_PRECEEDERS = ( |
| 345 | + "index", "indexii", "indexiii", "indexiv", |
| 346 | + "stindex", "obindex", "COMMENT", "label", |
| 347 | + ) |
| 348 | + |
326 | 349 | def fixup_paras(doc): |
327 | | - pass |
| 350 | + for child in doc.childNodes: |
| 351 | + if child.nodeType == xml.dom.core.ELEMENT \ |
| 352 | + and child.tagName in FIXUP_PARA_ELEMENTS: |
| 353 | + fixup_paras_helper(doc, child) |
| 354 | + descriptions = child.getElementsByTagName("description") |
| 355 | + for description in descriptions: |
| 356 | + if DEBUG_PARA_FIXER: |
| 357 | + sys.stderr.write("-- Fixing up <description> element...\n") |
| 358 | + fixup_paras_helper(doc, description) |
| 359 | + |
| 360 | + |
| 361 | +def fixup_paras_helper(doc, container): |
| 362 | + # document is already normalized |
| 363 | + children = container.childNodes |
| 364 | + start = 0 |
| 365 | + start_fixed = 0 |
| 366 | + i = 0 |
| 367 | + SKIP_ELEMENTS = PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS |
| 368 | + for child in children: |
| 369 | + if child.nodeType == xml.dom.core.ELEMENT: |
| 370 | + if child.tagName in FIXUP_PARA_ELEMENTS: |
| 371 | + fixup_paras_helper(doc, child) |
| 372 | + break |
| 373 | + elif child.tagName in SKIP_ELEMENTS: |
| 374 | + if not start_fixed: |
| 375 | + start = i + 1 |
| 376 | + elif not start_fixed: |
| 377 | + start_fixed = 1 |
| 378 | + i = i + 1 |
| 379 | + else: |
| 380 | + if child.nodeType == xml.dom.core.TEXT \ |
| 381 | + and string.strip(child.data) and not start_fixed: |
| 382 | + start_fixed = 1 |
| 383 | + i = i + 1 |
| 384 | + if DEBUG_PARA_FIXER: |
| 385 | + sys.stderr.write("fixup_paras_helper() called on <%s>; %d, %d\n" |
| 386 | + % (container.tagName, start, i)) |
| 387 | + if i > start: |
| 388 | + # the first [start:i] children shoudl be rewritten as <para> elements |
| 389 | + # start by breaking text nodes that contain \n\n+ into multiple nodes |
| 390 | + nstart, i = skip_leading_nodes(container.childNodes, start, i) |
| 391 | + if i > nstart: |
| 392 | + build_para(doc, container, nstart, i) |
| 393 | + fixup_paras_helper(doc, container) |
| 394 | + |
| 395 | + |
| 396 | +def build_para(doc, parent, start, i): |
| 397 | + children = parent.childNodes |
| 398 | + # collect all children until \n\n+ is found in a text node or a |
| 399 | + # PARA_LEVEL_ELEMENT is found. |
| 400 | + after = start + 1 |
| 401 | + have_last = 0 |
| 402 | + BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + FIXUP_PARA_ELEMENTS |
| 403 | + for j in range(start, i): |
| 404 | + after = j + 1 |
| 405 | + child = children[j] |
| 406 | + nodeType = child.nodeType |
| 407 | + if nodeType == xml.dom.core.ELEMENT: |
| 408 | + if child.tagName in BREAK_ELEMENTS: |
| 409 | + after = j |
| 410 | + break |
| 411 | + elif nodeType == xml.dom.core.TEXT: |
| 412 | + pos = string.find(child.data, "\n\n") |
| 413 | + if pos == 0: |
| 414 | + after = j |
| 415 | + break |
| 416 | + if pos >= 1: |
| 417 | + child.splitText(pos) |
| 418 | + break |
| 419 | + else: |
| 420 | + have_last = 1 |
| 421 | + if children[after - 1].nodeType == xml.dom.core.TEXT: |
| 422 | + # we may need to split off trailing white space: |
| 423 | + child = children[after - 1] |
| 424 | + data = child.data |
| 425 | + if string.rstrip(data) != data: |
| 426 | + have_last = 0 |
| 427 | + child.splitText(len(string.rstrip(data))) |
| 428 | + children = parent.childNodes |
| 429 | + para = doc.createElement("para") |
| 430 | + prev = None |
| 431 | + indexes = range(start, after) |
| 432 | + indexes.reverse() |
| 433 | + for j in indexes: |
| 434 | + node = children[j] |
| 435 | + parent.removeChild(node) |
| 436 | + para.insertBefore(node, prev) |
| 437 | + prev = node |
| 438 | + if have_last: |
| 439 | + parent.appendChild(para) |
| 440 | + else: |
| 441 | + parent.insertBefore(para, parent.childNodes[start]) |
| 442 | + |
| 443 | + |
| 444 | +def skip_leading_nodes(children, start, i): |
| 445 | + i = min(i, len(children)) |
| 446 | + while i > start: |
| 447 | + # skip over leading comments and whitespace: |
| 448 | + try: |
| 449 | + child = children[start] |
| 450 | + except IndexError: |
| 451 | + sys.stderr.write( |
| 452 | + "skip_leading_nodes() failed at index %d\n" % start) |
| 453 | + raise |
| 454 | + nodeType = child.nodeType |
| 455 | + if nodeType == xml.dom.core.COMMENT: |
| 456 | + start = start + 1 |
| 457 | + elif nodeType == xml.dom.core.TEXT: |
| 458 | + data = child.data |
| 459 | + shortened = string.lstrip(data) |
| 460 | + if shortened: |
| 461 | + if data != shortened: |
| 462 | + # break into two nodes: whitespace and non-whitespace |
| 463 | + child.splitText(len(data) - len(shortened)) |
| 464 | + return start + 1, i + 1 |
| 465 | + break |
| 466 | + # all whitespace, just skip |
| 467 | + start = start + 1 |
| 468 | + elif nodeType == xml.dom.core.ELEMENT: |
| 469 | + if child.tagName in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS: |
| 470 | + start = start + 1 |
| 471 | + else: |
| 472 | + break |
| 473 | + else: |
| 474 | + break |
| 475 | + return start, i |
328 | 476 |
|
329 | 477 |
|
330 | 478 | _token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$") |
331 | | - |
| 479 | + |
332 | 480 | def write_esis(doc, ofp, knownempty): |
333 | 481 | for node in doc.childNodes: |
334 | 482 | nodeType = node.nodeType |
|
0 commit comments