Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 865e9ff

Browse files
committed
Added a few more passes through the document fragment. Not actually
very interesting.
1 parent 96e4a06 commit 865e9ff

1 file changed

Lines changed: 126 additions & 11 deletions

File tree

Doc/tools/sgmlconv/docfixer.py

Lines changed: 126 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ class ConversionError(Exception):
2222
pass
2323

2424

25+
PARA_ELEMENT = "para"
26+
2527
DEBUG_PARA_FIXER = 0
2628

2729
if DEBUG_PARA_FIXER:
@@ -77,7 +79,17 @@ def find_all_elements(doc, gi):
7779
nodes.append(child)
7880
for node in child.getElementsByTagName(gi):
7981
nodes.append(node)
80-
return nodes
82+
return nodes
83+
84+
def find_all_elements_from_set(doc, gi_set, nodes=None):
85+
if nodes is None:
86+
nodes = []
87+
if doc.nodeType == ELEMENT and doc.tagName in gi_set:
88+
nodes.append(doc)
89+
for child in doc.childNodes:
90+
if child.nodeType == ELEMENT:
91+
find_all_elements_from_set(child, gi_set, nodes)
92+
return nodes
8193

8294

8395
def simplify(doc, fragment):
@@ -108,7 +120,7 @@ def simplify(doc, fragment):
108120
docelem.insertBefore(text, docelem.firstChild)
109121
docelem.insertBefore(node, text)
110122
docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
111-
while fragment.firstChild.nodeType == TEXT:
123+
while fragment.firstChild and fragment.firstChild.nodeType == TEXT:
112124
fragment.removeChild(fragment.firstChild)
113125

114126

@@ -291,8 +303,8 @@ def handle_appendix(doc, fragment):
291303
docelem.appendChild(doc.createTextNode("\n"))
292304

293305

294-
def handle_labels(doc):
295-
for label in find_all_elements(doc, "label"):
306+
def handle_labels(doc, fragment):
307+
for label in find_all_elements(fragment, "label"):
296308
id = label.getAttribute("id")
297309
if not id:
298310
continue
@@ -303,6 +315,11 @@ def handle_labels(doc):
303315
parent.setAttribute("id", id)
304316
# now, remove <label id="..."/> from parent:
305317
parent.removeChild(label)
318+
if parent.tagName == "title":
319+
parent.normalize()
320+
children = parent.childNodes
321+
if children[-1].nodeType == TEXT:
322+
children[-1].data = string.rstrip(children[-1].data)
306323

307324

308325
def fixup_trailing_whitespace(doc, wsmap):
@@ -587,25 +604,27 @@ def move_elements_by_name(doc, source, dest, name, sep=None):
587604
RECURSE_INTO_PARA_CONTAINERS = (
588605
"chapter", "abstract", "enumerate",
589606
"section", "subsection", "subsubsection",
590-
"paragraph", "subparagraph",
607+
"paragraph", "subparagraph", "back-matter",
591608
"howto", "manual",
592609
)
593610

594611
PARA_LEVEL_ELEMENTS = (
595612
"moduleinfo", "title", "verbatim", "enumerate", "item",
596-
"interpreter-session",
613+
"interpreter-session", "back-matter", "interactive-session",
597614
"opcodedesc", "classdesc", "datadesc",
598-
"funcdesc", "methoddesc", "excdesc",
615+
"funcdesc", "methoddesc", "excdesc", "memberdesc", "membderdescni",
599616
"funcdescni", "methoddescni", "excdescni",
600617
"tableii", "tableiii", "tableiv", "localmoduletable",
601618
"sectionauthor", "seealso",
602619
# include <para>, so we can just do it again to get subsequent paras:
603-
"para",
620+
PARA_ELEMENT,
604621
)
605622

606623
PARA_LEVEL_PRECEEDERS = (
607624
"index", "indexii", "indexiii", "indexiv", "setindexsubitem",
608625
"stindex", "obindex", "COMMENT", "label", "input", "title",
626+
"versionadded", "versionchanged", "declaremodule", "modulesynopsis",
627+
"moduleauthor",
609628
)
610629

611630

@@ -680,7 +699,7 @@ def build_para(doc, parent, start, i):
680699
if string.rstrip(data) != data:
681700
have_last = 0
682701
child.splitText(len(string.rstrip(data)))
683-
para = doc.createElement("para")
702+
para = doc.createElement(PARA_ELEMENT)
684703
prev = None
685704
indexes = range(start, after)
686705
indexes.reverse()
@@ -789,6 +808,98 @@ def fixup_verbatims(doc):
789808
verbatim._node.name = "interactive-session"
790809

791810

811+
def add_node_ids(fragment, counter=0):
812+
fragment._node.node_id = counter
813+
for node in fragment.childNodes:
814+
counter = counter + 1
815+
if node.nodeType == ELEMENT:
816+
counter = add_node_ids(node, counter)
817+
else:
818+
node._node.node_id = counter
819+
return counter + 1
820+
821+
822+
REFMODINDEX_ELEMENTS = ('refmodindex', 'refbimodindex',
823+
'refexmodindex', 'refstmodindex')
824+
825+
def fixup_refmodindexes(fragment):
826+
# Locate <ref*modindex>...</> co-located with <module>...</>, and
827+
# remove the <ref*modindex>, replacing it with index=index on the
828+
# <module> element.
829+
nodes = find_all_elements_from_set(fragment, REFMODINDEX_ELEMENTS)
830+
d = {}
831+
for node in nodes:
832+
parent = node.parentNode
833+
d[parent._node.node_id] = parent
834+
del nodes
835+
map(fixup_refmodindexes_chunk, d.values())
836+
837+
838+
def fixup_refmodindexes_chunk(container):
839+
# node is probably a <para>; let's see how often it isn't:
840+
if container.tagName != PARA_ELEMENT:
841+
sys.stderr.write("--- fixup_refmodindexes_chunk(%s)\n" % container)
842+
module_entries = find_all_elements(container, "module")
843+
if not module_entries:
844+
return
845+
index_entries = find_all_elements_from_set(container, REFMODINDEX_ELEMENTS)
846+
removes = []
847+
for entry in index_entries:
848+
children = entry.childNodes
849+
if len(children) != 0:
850+
sys.stderr.write(
851+
"--- unexpected number of children for %s node:\n"
852+
% entry.tagName)
853+
sys.stderr.write(entry.toxml() + "\n")
854+
continue
855+
found = 0
856+
module_name = entry.getAttribute("name")
857+
for node in module_entries:
858+
if len(node.childNodes) != 1:
859+
continue
860+
this_name = node.childNodes[0].data
861+
if this_name == module_name:
862+
found = 1
863+
node.setAttribute("index", "index")
864+
if found:
865+
removes.append(entry)
866+
for node in removes:
867+
container.removeChild(node)
868+
869+
870+
def fixup_bifuncindexes(fragment):
871+
nodes = find_all_elements(fragment, 'bifuncindex')
872+
d = {}
873+
for node in nodes:
874+
parent = node.parentNode
875+
d[parent._node.node_id] = parent
876+
del nodes
877+
map(fixup_bifuncindexes_chunk, d.values())
878+
879+
880+
def fixup_bifuncindexes_chunk(container):
881+
removes = []
882+
entries = find_all_elements(container, "bifuncindex")
883+
function_entries = find_all_elements(container, "function")
884+
for entry in entries:
885+
function_name = entry.getAttribute("name")
886+
found = 0
887+
for func_entry in function_entries:
888+
t2 = func_entry.childNodes[0].data
889+
if t2[-2:] != "()":
890+
continue
891+
t2 = t2[:-2]
892+
if t2 == function_name:
893+
894+
func_entry.setAttribute("index", "index")
895+
func_entry.setAttribute("module", "__builtin__")
896+
if not found:
897+
removes.append(entry)
898+
found = 1
899+
for entry in removes:
900+
container.removeChild(entry)
901+
902+
792903
_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
793904

794905
def write_esis(doc, ofp, knownempty):
@@ -798,7 +909,8 @@ def write_esis(doc, ofp, knownempty):
798909
gi = node.tagName
799910
if knownempty(gi):
800911
if node.hasChildNodes():
801-
raise ValueError, "declared-empty node has children"
912+
raise ValueError, \
913+
"declared-empty node <%s> has children" % gi
802914
ofp.write("e\n")
803915
for k, v in node.attributes.items():
804916
value = v.value
@@ -823,7 +935,7 @@ def convert(ifp, ofp):
823935
fragment = p.fragment
824936
normalize(fragment)
825937
simplify(doc, fragment)
826-
handle_labels(fragment)
938+
handle_labels(doc, fragment)
827939
handle_appendix(doc, fragment)
828940
fixup_trailing_whitespace(doc, {
829941
"abstract": "\n",
@@ -855,6 +967,9 @@ def convert(ifp, ofp):
855967
fixup_table_structures(doc, fragment)
856968
fixup_rfc_references(doc, fragment)
857969
fixup_signatures(doc, fragment)
970+
add_node_ids(fragment)
971+
fixup_refmodindexes(fragment)
972+
fixup_bifuncindexes(fragment)
858973
#
859974
d = {}
860975
for gi in p.get_empties():

0 commit comments

Comments
 (0)