From f522a771246e41c67d943044dbef829581ad7973 Mon Sep 17 00:00:00 2001 From: Jeremy Baker Date: Thu, 12 May 2016 22:36:36 -0700 Subject: [PATCH 01/23] Add a plugins section to the documentation Per #195 --- docs/index.rst | 1 + docs/plugins.rst | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 docs/plugins.rst diff --git a/docs/index.rst b/docs/index.rst index c304e103..05a0f761 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -11,4 +11,5 @@ PyDocX export_mixins enumerated_list_detection development + plugins release_notes diff --git a/docs/plugins.rst b/docs/plugins.rst new file mode 100644 index 00000000..40069e74 --- /dev/null +++ b/docs/plugins.rst @@ -0,0 +1,36 @@ +####### +Plugins +####### + +You may find yourself needing +a feature in PyDocX that doesn't exist +in the core library. + +If it's something that should exist, the +PyDocX project is always open to new +contributions. Details of how to contibute +can be found in :doc:`/development`. + +For things that don't fit in the core +library, it's easy to build a plugin +based on the :doc:`Extending PyDocX ` and +:doc:`Export Mixins ` sections. + +If you do build a plugin, edit this +documentation and add it below so that +other developers can find it. + +----------------- +Available Plugins +----------------- + +.. list-table:: + :widths: 20 80 + :header-rows: 1 + + * - Plugin + - Description + * - `pydocx-resize-images `_ + - Resizes large images to the dimensions they are in the docx file + * - `pydocx-s3-images `_ + - Uploads images to S3 instead of returning Data URIs From 7064c534a01dbdd9174265c6835dfa6e6897f85f Mon Sep 17 00:00:00 2001 From: iury Date: Wed, 18 May 2016 13:19:01 -0300 Subject: [PATCH 02/23] add run properties font interpretation --- pydocx/openxml/wordprocessing/__init__.py | 2 ++ pydocx/openxml/wordprocessing/rfonts.py | 25 +++++++++++++++++++ .../openxml/wordprocessing/run_properties.py | 2 ++ .../wordprocessing/test_run_properties.py | 10 ++++++++ 4 files changed, 39 insertions(+) create mode 100644 pydocx/openxml/wordprocessing/rfonts.py diff --git a/pydocx/openxml/wordprocessing/__init__.py b/pydocx/openxml/wordprocessing/__init__.py index 2430064d..23c9934f 100644 --- a/pydocx/openxml/wordprocessing/__init__.py +++ b/pydocx/openxml/wordprocessing/__init__.py @@ -26,6 +26,7 @@ from pydocx.openxml.wordprocessing.picture import Picture from pydocx.openxml.wordprocessing.run import Run from pydocx.openxml.wordprocessing.run_properties import RunProperties # noqa +from pydocx.openxml.wordprocessing.rfonts import RFonts from pydocx.openxml.wordprocessing.sdt_block import SdtBlock from pydocx.openxml.wordprocessing.sdt_content_block import SdtContentBlock from pydocx.openxml.wordprocessing.sdt_content_run import SdtContentRun @@ -70,6 +71,7 @@ 'Picture', 'Run', 'RunProperties', + 'RFonts', 'SdtBlock', 'SdtContentBlock', 'SdtContentRun', diff --git a/pydocx/openxml/wordprocessing/rfonts.py b/pydocx/openxml/wordprocessing/rfonts.py new file mode 100644 index 00000000..ea95216e --- /dev/null +++ b/pydocx/openxml/wordprocessing/rfonts.py @@ -0,0 +1,25 @@ +# coding: utf-8 +from __future__ import ( + absolute_import, + print_function, + unicode_literals, +) + +from pydocx.models import XmlModel, XmlAttribute + + +class RFonts(XmlModel): + XML_TAG = 'rFonts' + + hint = XmlAttribute(name='hint') + ascii = XmlAttribute(name='ascii') + h_ansi = XmlAttribute(name='hAnsi') + east_asia = XmlAttribute(name='eastAsia') + cs = XmlAttribute(name='cs') + ascii_theme = XmlAttribute(name='asciiTheme') + h_ansi_theme = XmlAttribute(name='hAnsiTheme') + east_asia_theme = XmlAttribute(name='eastAsiaTheme') + cs_theme = XmlAttribute(name='cstheme') + + def is_symbol(self): + return self.h_ansi == 'Symbol' diff --git a/pydocx/openxml/wordprocessing/run_properties.py b/pydocx/openxml/wordprocessing/run_properties.py index 46867e65..63587a57 100644 --- a/pydocx/openxml/wordprocessing/run_properties.py +++ b/pydocx/openxml/wordprocessing/run_properties.py @@ -7,6 +7,7 @@ from pydocx.models import XmlModel, XmlChild from pydocx.types import OnOff, Underline +from pydocx.openxml.wordprocessing.rfonts import RFonts class RunProperties(XmlModel): @@ -26,6 +27,7 @@ class RunProperties(XmlModel): pos = XmlChild(name='position', attrname='val') sz = XmlChild(name='sz', attrname='val') clr = XmlChild(name='color', attrname='val') + r_fonts = XmlChild(type=RFonts) @property def color(self): diff --git a/tests/openxml/wordprocessing/test_run_properties.py b/tests/openxml/wordprocessing/test_run_properties.py index 3d59fe14..bd1209d7 100644 --- a/tests/openxml/wordprocessing/test_run_properties.py +++ b/tests/openxml/wordprocessing/test_run_properties.py @@ -16,6 +16,16 @@ def _load_styles_from_xml(self, xml): root = parse_xml_from_string(xml) return RunProperties.load(root) + def test_run_properties_with_symbol_font(self): + xml = b''' + + + + ''' + properties = self._load_styles_from_xml(xml) + + self.assertTrue(properties.r_fonts.is_symbol()) + def test_bold_on(self): xml = b''' From e17b1d3685997ad11390f393dd527f0822b24beb Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Fri, 29 Jul 2016 14:39:50 -0400 Subject: [PATCH 03/23] refs #220: Added tests illustrating the problem. --- .../test_paragraph_properties.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/openxml/wordprocessing/test_paragraph_properties.py b/tests/openxml/wordprocessing/test_paragraph_properties.py index 9617abe4..b72fff8d 100644 --- a/tests/openxml/wordprocessing/test_paragraph_properties.py +++ b/tests/openxml/wordprocessing/test_paragraph_properties.py @@ -105,3 +105,21 @@ def test_returns_left_minus_hanging_ignoring_first_line(self): ''' properties = self._load_from_xml(xml) self.assertEqual(properties.start_margin_position, 100) + + def test_allow_decimal_indentation_for_hanging(self): + xml = ''' + + + + ''' + properties = self._load_from_xml(xml) + self.assertEqual(properties.start_margin_position, 100) + + def test_allow_decimal_indentation_for_first_line(self): + xml = ''' + + + + ''' + properties = self._load_from_xml(xml) + self.assertEqual(properties.start_margin_position, 173) From 2e48c75c0e561f4b6a81dfe3b411e588ae4bff61 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Fri, 29 Jul 2016 14:40:04 -0400 Subject: [PATCH 04/23] refs #220: Fixed broken tests. --- pydocx/openxml/wordprocessing/paragraph_properties.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pydocx/openxml/wordprocessing/paragraph_properties.py b/pydocx/openxml/wordprocessing/paragraph_properties.py index d893bd7b..52f3ec25 100644 --- a/pydocx/openxml/wordprocessing/paragraph_properties.py +++ b/pydocx/openxml/wordprocessing/paragraph_properties.py @@ -35,11 +35,11 @@ def start_margin_position(self): # ignored. start_margin = 0 if self.indentation_left: - start_margin += int(self.indentation_left) + start_margin += int(float(self.indentation_left)) if self.indentation_hanging: - start_margin -= int(self.indentation_hanging) + start_margin -= int(float(self.indentation_hanging)) elif self.indentation_first_line: - start_margin += int(self.indentation_first_line) + start_margin += int(float(self.indentation_first_line)) if start_margin: return start_margin return 0 From a97feda1d64a5f60e8868099e8072c67822030ff Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Fri, 29 Jul 2016 14:40:56 -0400 Subject: [PATCH 05/23] refs #220: Update note. --- CHANGELOG.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 41bb117b..439b5be9 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,7 @@ +**0.9.10** + +- Correctly handle margin positions with decimal points. + **0.9.9** - Rect elements now correctly handle image data From bd28dd610db3763fb90af963eef32e5355b264e5 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Fri, 29 Jul 2016 14:54:52 -0400 Subject: [PATCH 06/23] refs #220: make decimal points not 0 --- tests/openxml/wordprocessing/test_paragraph_properties.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/openxml/wordprocessing/test_paragraph_properties.py b/tests/openxml/wordprocessing/test_paragraph_properties.py index b72fff8d..6388afcd 100644 --- a/tests/openxml/wordprocessing/test_paragraph_properties.py +++ b/tests/openxml/wordprocessing/test_paragraph_properties.py @@ -109,7 +109,7 @@ def test_returns_left_minus_hanging_ignoring_first_line(self): def test_allow_decimal_indentation_for_hanging(self): xml = ''' - + ''' properties = self._load_from_xml(xml) @@ -118,7 +118,7 @@ def test_allow_decimal_indentation_for_hanging(self): def test_allow_decimal_indentation_for_first_line(self): xml = ''' - + ''' properties = self._load_from_xml(xml) From 0186b21b120c92285bc6ba79e45ab8c95984e038 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Fri, 29 Jul 2016 14:55:27 -0400 Subject: [PATCH 07/23] refs #220: Updated the update note. --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 439b5be9..ae66ccc9 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,6 @@ **0.9.10** -- Correctly handle margin positions with decimal points. +- No longer error when processing margin positions with decimal points. **0.9.9** From 3d01b3c7210f78a7c69409feac8c2cc8663d7de1 Mon Sep 17 00:00:00 2001 From: Jason Ward Date: Fri, 29 Jul 2016 16:31:17 -0400 Subject: [PATCH 08/23] Bumped to version 0.9.10 --- pydocx/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydocx/__init__.py b/pydocx/__init__.py index 4e5ebc69..d45f70ba 100644 --- a/pydocx/__init__.py +++ b/pydocx/__init__.py @@ -6,4 +6,4 @@ 'PyDocX', ] -__version__ = '0.9.9' +__version__ = '0.9.10' From 65a76f5289626c30162298e4bda577ea7bc9c24d Mon Sep 17 00:00:00 2001 From: Tarashish Mishra Date: Thu, 25 Aug 2016 15:13:06 +0530 Subject: [PATCH 09/23] Don't drop internal links --- pydocx/export/html.py | 10 +++++++++- pydocx/openxml/wordprocessing/__init__.py | 2 ++ pydocx/openxml/wordprocessing/bookmark.py | 14 ++++++++++++++ pydocx/openxml/wordprocessing/paragraph.py | 8 ++++++++ 4 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 pydocx/openxml/wordprocessing/bookmark.py diff --git a/pydocx/export/html.py b/pydocx/export/html.py index 40498a89..01d4a2dc 100644 --- a/pydocx/export/html.py +++ b/pydocx/export/html.py @@ -270,6 +270,11 @@ def get_heading_tag(self, paragraph): heading_style.name.lower(), self.default_heading_level, ) + if paragraph.bookmark_name: + attrs = { + 'id': paragraph.bookmark_name + } + return HtmlTag(tag, **attrs) return HtmlTag(tag) def export_paragraph(self, paragraph): @@ -507,7 +512,10 @@ def get_hyperlink_tag(self, target_uri): def export_hyperlink(self, hyperlink): results = super(PyDocXHTMLExporter, self).export_hyperlink(hyperlink) - tag = self.get_hyperlink_tag(target_uri=hyperlink.target_uri) + if hyperlink.target_uri: + tag = self.get_hyperlink_tag(target_uri=hyperlink.target_uri) + else: + tag = self.get_hyperlink_tag(target_uri='#' + hyperlink.anchor) if tag: results = tag.apply(results, allow_empty=False) diff --git a/pydocx/openxml/wordprocessing/__init__.py b/pydocx/openxml/wordprocessing/__init__.py index 515f64ca..4fce72a2 100644 --- a/pydocx/openxml/wordprocessing/__init__.py +++ b/pydocx/openxml/wordprocessing/__init__.py @@ -1,6 +1,7 @@ # coding: utf-8 from pydocx.openxml.wordprocessing.abstract_num import AbstractNum from pydocx.openxml.wordprocessing.body import Body +from pydocx.openxml.wordprocessing.bookmark import Bookmark from pydocx.openxml.wordprocessing.br import Break from pydocx.openxml.wordprocessing.deleted_run import DeletedRun from pydocx.openxml.wordprocessing.deleted_text import DeletedText @@ -47,6 +48,7 @@ __all__ = [ 'AbstractNum', 'Body', + 'Bookmark', 'Break', 'DeletedRun', 'DeletedText', diff --git a/pydocx/openxml/wordprocessing/bookmark.py b/pydocx/openxml/wordprocessing/bookmark.py new file mode 100644 index 00000000..1e7bf417 --- /dev/null +++ b/pydocx/openxml/wordprocessing/bookmark.py @@ -0,0 +1,14 @@ +# coding: utf-8 +from __future__ import ( + absolute_import, + print_function, + unicode_literals, +) + +from pydocx.models import XmlModel, XmlAttribute + + +class Bookmark(XmlModel): + XML_TAG = 'bookmarkStart' + + name = XmlAttribute(name='name') diff --git a/pydocx/openxml/wordprocessing/paragraph.py b/pydocx/openxml/wordprocessing/paragraph.py index af59dd7b..fe5443e3 100644 --- a/pydocx/openxml/wordprocessing/paragraph.py +++ b/pydocx/openxml/wordprocessing/paragraph.py @@ -16,6 +16,7 @@ from pydocx.openxml.wordprocessing.deleted_run import DeletedRun from pydocx.openxml.wordprocessing.sdt_run import SdtRun from pydocx.openxml.wordprocessing.simple_field import SimpleField +from pydocx.openxml.wordprocessing.bookmark import Bookmark class Paragraph(XmlModel): @@ -31,6 +32,7 @@ class Paragraph(XmlModel): DeletedRun, SdtRun, SimpleField, + Bookmark ) def __init__(self, **kwargs): @@ -121,6 +123,12 @@ def runs(self): if isinstance(p_child, Run): yield p_child + @property + def bookmark_name(self): + for p_child in self.children: + if isinstance(p_child, Bookmark): + return p_child.name + def get_text(self, tab_char=None): ''' Return a string of all of the contained Text nodes concatenated From ab1056ef042a4648ae51c88583f2a845145cdfa7 Mon Sep 17 00:00:00 2001 From: Tarashish Mishra Date: Thu, 25 Aug 2016 17:28:35 +0530 Subject: [PATCH 10/23] First check if anchor exists --- pydocx/export/html.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pydocx/export/html.py b/pydocx/export/html.py index 01d4a2dc..070ae1f3 100644 --- a/pydocx/export/html.py +++ b/pydocx/export/html.py @@ -512,10 +512,10 @@ def get_hyperlink_tag(self, target_uri): def export_hyperlink(self, hyperlink): results = super(PyDocXHTMLExporter, self).export_hyperlink(hyperlink) - if hyperlink.target_uri: - tag = self.get_hyperlink_tag(target_uri=hyperlink.target_uri) - else: + if not hyperlink.target_uri and hyperlink.anchor: tag = self.get_hyperlink_tag(target_uri='#' + hyperlink.anchor) + else: + tag = self.get_hyperlink_tag(target_uri=hyperlink.target_uri) if tag: results = tag.apply(results, allow_empty=False) From 7e8ce845bf30a34c1d3babe2068273b98b3eb84a Mon Sep 17 00:00:00 2001 From: Tarashish Mishra Date: Fri, 26 Aug 2016 12:32:29 +0530 Subject: [PATCH 11/23] minor refactoring --- pydocx/export/html.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pydocx/export/html.py b/pydocx/export/html.py index 070ae1f3..fd8ebce1 100644 --- a/pydocx/export/html.py +++ b/pydocx/export/html.py @@ -271,10 +271,7 @@ def get_heading_tag(self, paragraph): self.default_heading_level, ) if paragraph.bookmark_name: - attrs = { - 'id': paragraph.bookmark_name - } - return HtmlTag(tag, **attrs) + return HtmlTag(tag, id=paragraph.bookmark_name) return HtmlTag(tag) def export_paragraph(self, paragraph): From ae32fb3509e663d985a2a13019bc2396b4ee439d Mon Sep 17 00:00:00 2001 From: Tarashish Mishra Date: Fri, 26 Aug 2016 12:48:14 +0530 Subject: [PATCH 12/23] Add tests for internal links --- tests/export/html/test_heading.py | 27 +++++++++++++++++++++++++++ tests/export/html/test_hyperlink.py | 18 ++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/tests/export/html/test_heading.py b/tests/export/html/test_heading.py index efd7ab1c..b77dfc2d 100644 --- a/tests/export/html/test_heading.py +++ b/tests/export/html/test_heading.py @@ -744,3 +744,30 @@ def test_single_lvl_list_has_precedence_over_headings(self): ''' self.assert_document_generates_html(document, expected_html) + + def test_heading_with_bookmark(self): + document_xml = ''' +

+ + + + + + + aaa + +

+ ''' + + style_xml = ''' + + ''' + + document = WordprocessingDocumentFactory() + document.add(StyleDefinitionsPart, style_xml) + document.add(MainDocumentPart, document_xml) + + expected_html = '

aaa

' + self.assert_document_generates_html(document, expected_html) diff --git a/tests/export/html/test_hyperlink.py b/tests/export/html/test_hyperlink.py index a88ab748..dbbe4a0c 100644 --- a/tests/export/html/test_hyperlink.py +++ b/tests/export/html/test_hyperlink.py @@ -194,3 +194,21 @@ def test_with_anchor(self): expected_html = '

link.

' self.assert_document_generates_html(document, expected_html) + + def test_internal_link(self): + document_xml = ''' +

+ + + link + + +

+ ''' + + document = WordprocessingDocumentFactory() + + document.add(MainDocumentPart, document_xml) + + expected_html = '

link

' + self.assert_document_generates_html(document, expected_html) From fdac283c14aa21fc2e3b350d150bb075da579e7a Mon Sep 17 00:00:00 2001 From: Wes Winham Date: Fri, 26 Aug 2016 15:12:54 -0400 Subject: [PATCH 13/23] Included #222 in the changelog --- CHANGELOG.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ae66ccc9..120f60d2 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,7 @@ +**dev** + +- Internal links and anchors are now retained. Thanks, sunu! `#222 `_ + **0.9.10** - No longer error when processing margin positions with decimal points. From 9cd76eeb1f99cb3e580a8138a00295087f86eae0 Mon Sep 17 00:00:00 2001 From: Wes Winham Date: Fri, 26 Aug 2016 15:13:44 -0400 Subject: [PATCH 14/23] Added sunu to authors for #222 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index d1b33f05..c4b46c16 100644 --- a/AUTHORS +++ b/AUTHORS @@ -3,3 +3,4 @@ Jason Ward Kyle Gibson Chirica Gheorghe Anirudha Bose +Tarashish Mishra From 5a4d93a2d8f11553731119781fe1985b322f9e95 Mon Sep 17 00:00:00 2001 From: Chirica Gheorghe Date: Wed, 1 Feb 2017 18:06:19 +0200 Subject: [PATCH 15/23] Fixed item listing, moved margin-left to

--- pydocx/export/html.py | 152 +++++++++++++----- pydocx/export/numbering_span.py | 136 +++++++++++++++- pydocx/openxml/wordprocessing/paragraph.py | 4 + .../wordprocessing/paragraph_properties.py | 8 + 4 files changed, 257 insertions(+), 43 deletions(-) diff --git a/pydocx/export/html.py b/pydocx/export/html.py index fd8ebce1..44a87b77 100644 --- a/pydocx/export/html.py +++ b/pydocx/export/html.py @@ -96,12 +96,12 @@ class HtmlTag(object): closed_tag_format = '' def __init__( - self, - tag, - allow_self_closing=False, - closed=False, - allow_whitespace=False, - **attrs + self, + tag, + allow_self_closing=False, + closed=False, + allow_whitespace=False, + **attrs ): self.tag = tag self.allow_self_closing = allow_self_closing @@ -315,42 +315,37 @@ def export_paragraph_property_indentation(self, paragraph, results): style = {} - if properties.indentation_right: - # TODO would be nice if this integer conversion was handled - # implicitly by the model somehow - try: - right = int(properties.indentation_right) - except ValueError: - right = None + # for numbering properties we add style to span item level + if paragraph.properties.numbering_properties is None: + indentation_left = properties.to_int('indentation_left') + indentation_first_line = properties.to_int('indentation_first_line') + else: + indentation_left = None + indentation_first_line = None - if right: - right = convert_twips_to_ems(right) - style['margin-right'] = '{0:.2f}em'.format(right) + listing_style = self.export_listing_paragraph_property_indentation( + properties, + paragraph.get_numbering_level().paragraph_properties, + include_text_indent=True + ) + if 'text-indent' in listing_style and listing_style['text-indent'] != '0.00em': + style['text-indent'] = listing_style['text-indent'] + style['display'] = 'inline-block' - if properties.indentation_left: - # TODO would be nice if this integer conversion was handled - # implicitly by the model somehow - try: - left = int(properties.indentation_left) - except ValueError: - left = None + indentation_right = properties.to_int('indentation_right') - if left: - left = convert_twips_to_ems(left) - style['margin-left'] = '{0:.2f}em'.format(left) + if indentation_right: + right = convert_twips_to_ems(indentation_right) + style['margin-right'] = '{0:.2f}em'.format(right) - if properties.indentation_first_line: - # TODO would be nice if this integer conversion was handled - # implicitly by the model somehow - try: - first_line = int(properties.indentation_first_line) - except ValueError: - first_line = None + if indentation_left: + left = convert_twips_to_ems(indentation_left) + style['margin-left'] = '{0:.2f}em'.format(left) - if first_line: - first_line = convert_twips_to_ems(first_line) - # TODO text-indent doesn't work with inline elements like span - style['text-indent'] = '{0:.2f}em'.format(first_line) + if indentation_first_line: + first_line = convert_twips_to_ems(indentation_first_line) + # TODO text-indent doesn't work with inline elements like span + style['text-indent'] = '{0:.2f}em'.format(first_line) if style: attrs = { @@ -361,6 +356,72 @@ def export_paragraph_property_indentation(self, paragraph, results): return results + def export_listing_paragraph_property_indentation(self, paragraph_properties, level_properties, + include_text_indent=False): + style = {} + + level_indentation_left = level_properties.to_int('indentation_left') + level_indentation_hanging = level_properties.to_int('indentation_hanging') + + paragraph_indentation_left = paragraph_properties.to_int('indentation_left') + paragraph_indentation_hanging = paragraph_properties.to_int('indentation_hanging') + paragraph_indentation_first_line = paragraph_properties.to_int('indentation_first_line') + + left = 0 + hanging = 0 + + if paragraph_indentation_left is None and paragraph_indentation_hanging is None: + left = 0 + hanging = 0 + elif paragraph_indentation_left is None and paragraph_indentation_hanging is not None: + left = level_indentation_left + + hanging = paragraph_indentation_hanging + hanging -= level_indentation_hanging + + left -= level_indentation_hanging + left -= paragraph_indentation_hanging + + elif paragraph_indentation_left is not None and paragraph_indentation_hanging is None: + left = paragraph_indentation_left - level_indentation_hanging + hanging = 0 + + elif paragraph_indentation_left is not None and paragraph_indentation_hanging is not None: + left = paragraph_indentation_left + hanging = paragraph_indentation_hanging + + left -= hanging + + if paragraph_indentation_left > level_indentation_left: + # this mean that 'left' include also the listing indentation + # we remove the default listing indentations because html ul/ol/li does add it's own + left -= (level_indentation_left - level_indentation_hanging) + else: + left -= level_indentation_hanging + + hanging -= level_indentation_hanging + + # first line is added as left margin + if paragraph_indentation_first_line is not None: + left += paragraph_indentation_first_line + + if left: + left = convert_twips_to_ems(left) + style['margin-left'] = '{0:.2f}em'.format(left) + + # we don't allow negative hanging + if hanging < 0: + hanging = 0 + + if include_text_indent: + if hanging is not None: + # Now, here we add the hanging as text-indent for the paragraph + hanging = convert_twips_to_ems(hanging) + # TODO text-indent doesn't work with inline elements like span + style['text-indent'] = '{0:.2f}em'.format(hanging) + + return style + def get_run_styles_to_apply(self, run): parent_paragraph = run.get_first_ancestor(wordprocessing.Paragraph) if parent_paragraph and parent_paragraph.heading_style: @@ -737,7 +798,22 @@ def export_numbering_item(self, numbering_item): numbering_item.children, self.export_node, ) - tag = HtmlTag('li') + + style = None + + if numbering_item.children: + level_properties = numbering_item.numbering_span.numbering_level.paragraph_properties + paragraph_properties = numbering_item.children[0].properties + + style = self.export_listing_paragraph_property_indentation(paragraph_properties, + level_properties) + + attrs = {} + + if style: + attrs['style'] = convert_dictionary_to_style_fragment(style) + + tag = HtmlTag('li', **attrs) return tag.apply(results) def export_field_hyperlink(self, simple_field, field_args): diff --git a/pydocx/export/numbering_span.py b/pydocx/export/numbering_span.py index f84dbe3e..46b260c4 100644 --- a/pydocx/export/numbering_span.py +++ b/pydocx/export/numbering_span.py @@ -10,11 +10,10 @@ import string from pydocx.openxml import wordprocessing -from pydocx.util.memoize import memoized - from pydocx.openxml.wordprocessing.run import Run from pydocx.openxml.wordprocessing.tab_char import TabChar from pydocx.openxml.wordprocessing.text import Text +from pydocx.util.memoize import memoized # Defined in 17.15.1.25 DEFAULT_AUTOMATIC_TAB_STOP_INTERVAL = 720 # twips @@ -198,6 +197,10 @@ def __init__(self, components=None): self.current_item = None self.current_item_index = 0 self.candidate_numbering_items = [] + self.child_parent_num_map = {} + self.parent_child_num_map = {} + + self.detect_parent_child_map_for_items() @memoized def get_numbering_level(self, paragraph): @@ -206,6 +209,101 @@ def get_numbering_level(self, paragraph): return None return level + def detect_parent_child_map_for_items(self): + """ + There are cases when we have span inside an item and this span is different from the parent one. + Example listing: + 1. A + 2. B + Separate + * B1 + * B2 + 3. C + + In the above example B1, B2 items are creating a separate span and does have different num. definition. + We need to somehow detect this cases and make sure we properly continue numbering(in this case '3. C'). + + We parse this as following: + let say that list: A, B, C has abstract_num_id = 1 + and list: B1, B2 has abstract_num_id = 4 + + As output we will construct 2 dicts as follow: + child_parent_num_map = { + "4": "1" + } + + parent_child_num_map = { + "1": "4" + } + + So, when we process paragraph item we know from the start that it has a parent or not. + """ + + if not self.components: + return + + parent_child_map = {} + child_parent_map = {} + # we are interested only in components that are part of the listing + components = [component for component in self.components if component.properties + and component.properties.numbering_properties] + if not components: + return + + components_reversed = list(reversed(components)) + + for i, component in enumerate(components): + parent_num_id = component.numbering_definition.abstract_num_id + nums = [] + outer_item_found = False + for j, next_component in enumerate(components_reversed[:-1]): + next_num_id = next_component.numbering_definition.abstract_num_id + if parent_num_id == next_num_id and parent_num_id not in parent_child_map: + outer_item_found = True + break + + if outer_item_found: + for _component in components[i + 1:-j - 1]: + child_num_id = _component.numbering_definition.abstract_num_id + if child_num_id != parent_num_id: + nums.append(child_num_id) + if nums: + parent_child_map[parent_num_id] = nums + + # save also the child parent map so that we can easily check if child has parents + for parent, children in parent_child_map.items(): + for child in children: + child_parent_map[child] = parent + + self.child_parent_num_map = child_parent_map + self.parent_child_num_map = parent_child_map + + def inside_parent_span(self, paragraph): + numbering_properties = paragraph.properties.numbering_properties + if not numbering_properties: + return False + + paragraph_num_id = paragraph.numbering_definition.abstract_num_id + + if not self.current_span: + return False + + return bool(self.child_parent_num_map.get(paragraph_num_id, None)) + + def is_parent_of_current_span(self, paragraph): + numbering_properties = paragraph.properties.numbering_properties + if not numbering_properties: + return False + + paragraph_num_id = paragraph.numbering_definition.abstract_num_id + + if not self.current_span: + return True + + current_span_num_id = self.current_span.numbering_definition.abstract_num_id + + return current_span_num_id in self.parent_child_num_map.get(paragraph_num_id, []) + def include_candidate_items_in_current_item(self, new_item_index): ''' A generator to determine which of the candidate numbering items need to @@ -235,12 +333,19 @@ def should_start_new_span(self, paragraph): span, start a new span. Otherwise, do not start a new span. ''' + if self.current_span is None: return True level = self.get_numbering_level(paragraph) num_def = None if level: num_def = level.parent + + if self.inside_parent_span(paragraph): + return False + elif self.is_parent_of_current_span(paragraph): + return False + return num_def != self.current_span.numbering_definition def should_start_new_item(self, paragraph): @@ -256,6 +361,10 @@ def should_start_new_item(self, paragraph): num_def = None if level: num_def = level.parent + + if self.inside_parent_span(paragraph) or self.is_parent_of_current_span(paragraph): + return True + return num_def == self.current_span.numbering_definition def handle_start_new_span(self, index, paragraph): @@ -306,8 +415,9 @@ def handle_start_new_item(self, index, paragraph): else: level_id = int(level.level_id) current_level_id = int(self.current_span.numbering_level.level_id) - if level_id > current_level_id: + if level_id > current_level_id or self.inside_parent_span(paragraph): # Add a new span + item to hold this new level + next_numbering_span = NumberingSpan( numbering_level=level, numbering_definition=num_def, @@ -322,10 +432,14 @@ def handle_start_new_item(self, index, paragraph): self.current_span = next_numbering_span self.current_item = next_numbering_item self.current_item_index = index - elif level_id < current_level_id: + elif level_id < current_level_id or self.is_parent_of_current_span(paragraph): # we need to "subtract" a level. To do that, find the level # that we're going back to, which may not even exist - previous_span = self.find_previous_numbering_span_with_lower_level(level_id) + if self.is_parent_of_current_span(paragraph): + previous_span = self.find_previous_numbering_span_by_num_def(paragraph) + else: + previous_span = self.find_previous_numbering_span_with_lower_level(level_id) + if self.numbering_span_stack: assert previous_span self.current_span = previous_span @@ -358,6 +472,16 @@ def find_previous_numbering_span_with_lower_level(self, level_id): self.numbering_span_stack.pop() return previous_span + def find_previous_numbering_span_by_num_def(self, paragraph): + previous_span = None + while self.numbering_span_stack: + previous_span = self.numbering_span_stack[-1] + if previous_span.numbering_definition == paragraph.numbering_definition: + # we found the parent span of the paragraph item + break + self.numbering_span_stack.pop() + return previous_span + def handle_paragraph(self, index, paragraph): level = self.get_numbering_level(paragraph) num_def = None @@ -549,8 +673,10 @@ def detect_new_faked_level_started(self, paragraph, current_level_id=None): def get_left_position_for_numbering_span(self, numbering_span): paragraph = numbering_span.get_first_child_of_first_item() + left_pos = self.get_left_position_for_paragraph(paragraph) num_level_para_properties = numbering_span.numbering_level.paragraph_properties + if num_level_para_properties: left_pos += num_level_para_properties.start_margin_position return left_pos diff --git a/pydocx/openxml/wordprocessing/paragraph.py b/pydocx/openxml/wordprocessing/paragraph.py index fe5443e3..a6a74abd 100644 --- a/pydocx/openxml/wordprocessing/paragraph.py +++ b/pydocx/openxml/wordprocessing/paragraph.py @@ -47,6 +47,10 @@ def effective_properties(self): self._effective_properties = properties return self._effective_properties + @property + def numbering_definition(self): + return self.get_numbering_definition() + def has_structured_document_parent(self): from pydocx.openxml.wordprocessing import SdtBlock return self.has_ancestor(SdtBlock) diff --git a/pydocx/openxml/wordprocessing/paragraph_properties.py b/pydocx/openxml/wordprocessing/paragraph_properties.py index 52f3ec25..94d47c6a 100644 --- a/pydocx/openxml/wordprocessing/paragraph_properties.py +++ b/pydocx/openxml/wordprocessing/paragraph_properties.py @@ -43,3 +43,11 @@ def start_margin_position(self): if start_margin: return start_margin return 0 + + def to_int(self, attribute): + # TODO would be nice if this integer conversion was handled + # implicitly by the model somehow + try: + return int(getattr(self, attribute, None)) + except (ValueError, TypeError): + return None From 407a516e7b60dc4d359a4c0cee93f48ebc843e8f Mon Sep 17 00:00:00 2001 From: Chirica Gheorghe Date: Wed, 1 Feb 2017 20:11:20 +0200 Subject: [PATCH 16/23] Fixed no 'level_indentation' for fake lists --- pydocx/export/html.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pydocx/export/html.py b/pydocx/export/html.py index 44a87b77..9b2715b9 100644 --- a/pydocx/export/html.py +++ b/pydocx/export/html.py @@ -360,8 +360,12 @@ def export_listing_paragraph_property_indentation(self, paragraph_properties, le include_text_indent=False): style = {} - level_indentation_left = level_properties.to_int('indentation_left') - level_indentation_hanging = level_properties.to_int('indentation_hanging') + if level_properties: + level_indentation_left = level_properties.to_int('indentation_left') + level_indentation_hanging = level_properties.to_int('indentation_hanging') + else: + level_indentation_left = 0 + level_indentation_hanging = 0 paragraph_indentation_left = paragraph_properties.to_int('indentation_left') paragraph_indentation_hanging = paragraph_properties.to_int('indentation_hanging') @@ -371,7 +375,7 @@ def export_listing_paragraph_property_indentation(self, paragraph_properties, le hanging = 0 if paragraph_indentation_left is None and paragraph_indentation_hanging is None: - left = 0 + left = level_indentation_left - level_indentation_hanging hanging = 0 elif paragraph_indentation_left is None and paragraph_indentation_hanging is not None: left = level_indentation_left @@ -803,6 +807,8 @@ def export_numbering_item(self, numbering_item): if numbering_item.children: level_properties = numbering_item.numbering_span.numbering_level.paragraph_properties + # get the first paragraph properties with will contain information on how to properly + # indent listing item paragraph_properties = numbering_item.children[0].properties style = self.export_listing_paragraph_property_indentation(paragraph_properties, From 1bd667ee724d6592cb8681d8b7f2da0083b0ab81 Mon Sep 17 00:00:00 2001 From: Chirica Gheorghe Date: Fri, 3 Feb 2017 11:02:07 +0200 Subject: [PATCH 17/23] Added test cases for the added features --- pydocx/export/base.py | 1 + pydocx/export/html.py | 144 +++++++++++------- pydocx/export/numbering_span.py | 24 +-- pydocx/openxml/wordprocessing/abstract_num.py | 17 +++ pydocx/openxml/wordprocessing/paragraph.py | 33 +++- .../wordprocessing/paragraph_properties.py | 6 +- .../test_faked_superscript_and_subscript.py | 3 +- tests/export/test_docx.py | 2 + tests/export/test_xml.py | 26 ++-- tests/fixtures/lists_with_margins.docx | Bin 0 -> 13237 bytes tests/fixtures/lists_with_margins.html | 19 +++ .../nested_lists_different_num_ids.docx | Bin 0 -> 29989 bytes .../nested_lists_different_num_ids.html | 19 +++ 13 files changed, 212 insertions(+), 82 deletions(-) create mode 100644 tests/fixtures/lists_with_margins.docx create mode 100644 tests/fixtures/lists_with_margins.html create mode 100644 tests/fixtures/nested_lists_different_num_ids.docx create mode 100644 tests/fixtures/nested_lists_different_num_ids.html diff --git a/pydocx/export/base.py b/pydocx/export/base.py index 9ca2afef..2360a702 100644 --- a/pydocx/export/base.py +++ b/pydocx/export/base.py @@ -32,6 +32,7 @@ def __init__(self, path): self.captured_runs = None self.complex_field_runs = [] + self.numbering_level_listing_track = {} self.node_type_to_export_func_map = { wordprocessing.Document: self.export_document, diff --git a/pydocx/export/html.py b/pydocx/export/html.py index 9b2715b9..39a69c41 100644 --- a/pydocx/export/html.py +++ b/pydocx/export/html.py @@ -17,7 +17,7 @@ POINTS_PER_EM, PYDOCX_STYLES, TWIPS_PER_POINT, - EMUS_PER_PIXEL, + EMUS_PER_PIXEL ) from pydocx.export.base import PyDocXExporter from pydocx.export.numbering_span import NumberingItem @@ -323,14 +323,17 @@ def export_paragraph_property_indentation(self, paragraph, results): indentation_left = None indentation_first_line = None - listing_style = self.export_listing_paragraph_property_indentation( - properties, - paragraph.get_numbering_level().paragraph_properties, - include_text_indent=True - ) - if 'text-indent' in listing_style and listing_style['text-indent'] != '0.00em': - style['text-indent'] = listing_style['text-indent'] - style['display'] = 'inline-block' + paragraph_num_level = paragraph.get_numbering_level() + + if paragraph_num_level: + listing_style = self.export_listing_paragraph_property_indentation( + paragraph, + paragraph_num_level.paragraph_properties, + include_text_indent=True + ) + if 'text-indent' in listing_style and listing_style['text-indent'] != '0.00em': + style['text-indent'] = listing_style['text-indent'] + style['display'] = 'inline-block' indentation_right = properties.to_int('indentation_right') @@ -356,58 +359,96 @@ def export_paragraph_property_indentation(self, paragraph, results): return results - def export_listing_paragraph_property_indentation(self, paragraph_properties, level_properties, + def get_previous_level_paragraph(self, num_id, level_id): + level_id = int(level_id) + + while True: + if level_id == 0: + prev_level_id = level_id + else: + prev_level_id = level_id - 1 + + prev_level_paragraphs = self.numbering_level_listing_track[num_id][prev_level_id] + if prev_level_paragraphs: + return prev_level_paragraphs[-1] + + if prev_level_id == 0 and not prev_level_paragraphs: + # this is an ege case with older version of word when it may contain a sublist + # into a separate num_id. + break + + level_id -= 1 + + return None + + def export_listing_paragraph_property_indentation(self, paragraph, level_properties, include_text_indent=False): style = {} - if level_properties: - level_indentation_left = level_properties.to_int('indentation_left') - level_indentation_hanging = level_properties.to_int('indentation_hanging') - else: - level_indentation_left = 0 - level_indentation_hanging = 0 + if not level_properties or not paragraph.has_numbering_properties: + return style - paragraph_indentation_left = paragraph_properties.to_int('indentation_left') - paragraph_indentation_hanging = paragraph_properties.to_int('indentation_hanging') - paragraph_indentation_first_line = paragraph_properties.to_int('indentation_first_line') + default_level_indentation = paragraph.get_numbering_default_level_indentation() - left = 0 - hanging = 0 + paragraph_properties = paragraph.properties - if paragraph_indentation_left is None and paragraph_indentation_hanging is None: - left = level_indentation_left - level_indentation_hanging - hanging = 0 - elif paragraph_indentation_left is None and paragraph_indentation_hanging is not None: - left = level_indentation_left + level_id = int(paragraph_properties.numbering_properties.level_id) + num_id = paragraph_properties.numbering_properties.num_id - hanging = paragraph_indentation_hanging - hanging -= level_indentation_hanging + level_ind_left = level_properties.to_int('indentation_left', default=0) + level_ind_hanging = level_properties.to_int('indentation_hanging', default=0) - left -= level_indentation_hanging - left -= paragraph_indentation_hanging + paragraph_ind_left = paragraph_properties.to_int('indentation_left', default=0) + paragraph_ind_hanging = paragraph_properties.to_int('indentation_hanging', default=0) + paragraph_ind_first_line = paragraph_properties.to_int('indentation_first_line', + default=0) - elif paragraph_indentation_left is not None and paragraph_indentation_hanging is None: - left = paragraph_indentation_left - level_indentation_hanging - hanging = 0 + left = paragraph_ind_left or level_ind_left + hanging = paragraph_ind_hanging or level_ind_hanging + + # at this point we have no info about indentation, so we keep the default one + if not left and not hanging: + return style + + if num_id not in self.numbering_level_listing_track: + # by default there are only 9 numbering levels in docx(0 indexed) + self.numbering_level_listing_track[num_id] = [[] for _ in range(10)] + if paragraph not in self.numbering_level_listing_track[num_id][level_id]: + self.numbering_level_listing_track[num_id][level_id].append(paragraph) + + # by default left contains hanging as well, so we remove it + left -= hanging - elif paragraph_indentation_left is not None and paragraph_indentation_hanging is not None: - left = paragraph_indentation_left - hanging = paragraph_indentation_hanging + if level_id == 0: + # because html ul/ol/li elements have there default indentations + # we remove the default word one as well + # this way we will have as near as possible migration to html + left -= (default_level_indentation['left'] - level_ind_hanging) - left -= hanging + # first line are added left margins + if paragraph_ind_first_line: + left += paragraph_ind_first_line - if paragraph_indentation_left > level_indentation_left: - # this mean that 'left' include also the listing indentation - # we remove the default listing indentations because html ul/ol/li does add it's own - left -= (level_indentation_left - level_indentation_hanging) + if level_id > 0: + # for nested levels we need to add indentation based on parent level + prev_paragraph = self.get_previous_level_paragraph(num_id, level_id) + if prev_paragraph: + prev_left_level_indentation = prev_paragraph.get_numbering_level().\ + paragraph_properties.to_int('indentation_left') + left -= (prev_left_level_indentation - level_ind_hanging) else: - left -= level_indentation_hanging + # there are edge cases when we have a level > 0 for specific num_id but no + # actual level=0 for this num_id. in such cases we just do the default + # indentation + left -= level_ind_hanging - hanging -= level_indentation_hanging + # because lists add there own nested level indentation we subtract it here + # and the remaining part will be the actual needed indentation + left -= default_level_indentation['level_indentation_step'] - # first line is added as left margin - if paragraph_indentation_first_line is not None: - left += paragraph_indentation_first_line + # here we well, we remove the default hanging which word adds + # because

tag will provide it's own + hanging -= level_ind_hanging if left: left = convert_twips_to_ems(left) @@ -806,12 +847,13 @@ def export_numbering_item(self, numbering_item): style = None if numbering_item.children: - level_properties = numbering_item.numbering_span.numbering_level.paragraph_properties - # get the first paragraph properties with will contain information on how to properly - # indent listing item - paragraph_properties = numbering_item.children[0].properties + level_properties = numbering_item.numbering_span.\ + numbering_level.paragraph_properties + # get the first paragraph properties which will contain information + # on how to properly indent listing item + paragraph = numbering_item.children[0] - style = self.export_listing_paragraph_property_indentation(paragraph_properties, + style = self.export_listing_paragraph_property_indentation(paragraph, level_properties) attrs = {} diff --git a/pydocx/export/numbering_span.py b/pydocx/export/numbering_span.py index 46b260c4..b99930af 100644 --- a/pydocx/export/numbering_span.py +++ b/pydocx/export/numbering_span.py @@ -18,7 +18,6 @@ # Defined in 17.15.1.25 DEFAULT_AUTOMATIC_TAB_STOP_INTERVAL = 720 # twips - roman_numeral_map = tuple(zip( (1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1), ('M', 'CM', 'D', 'CD', 'C', 'XC', 'L', 'XL', 'X', 'IX', 'V', 'IV', 'I') @@ -211,7 +210,8 @@ def get_numbering_level(self, paragraph): def detect_parent_child_map_for_items(self): """ - There are cases when we have span inside an item and this span is different from the parent one. + There are cases when we have span inside an item and this span is different from + the parent one. Example listing: 1. A 2. B @@ -220,8 +220,9 @@ def detect_parent_child_map_for_items(self): * B2 3. C - In the above example B1, B2 items are creating a separate span and does have different num. definition. - We need to somehow detect this cases and make sure we properly continue numbering(in this case '3. C'). + In the above example B1, B2 items are creating a separate span and does have different + num. definition. We need to somehow detect this cases and make sure we properly + continue numbering(in this case '3. C'). We parse this as following: let say that list: A, B, C has abstract_num_id = 1 @@ -245,8 +246,10 @@ def detect_parent_child_map_for_items(self): parent_child_map = {} child_parent_map = {} # we are interested only in components that are part of the listing - components = [component for component in self.components if component.properties - and component.properties.numbering_properties] + components = [component for component in self.components if + hasattr(component, 'properties') + and hasattr(component.properties, 'numbering_properties') + and component.numbering_definition] if not components: return @@ -279,8 +282,7 @@ def detect_parent_child_map_for_items(self): self.parent_child_num_map = parent_child_map def inside_parent_span(self, paragraph): - numbering_properties = paragraph.properties.numbering_properties - if not numbering_properties: + if not paragraph.has_numbering_properties or not paragraph.has_numbering_definition: return False paragraph_num_id = paragraph.numbering_definition.abstract_num_id @@ -291,8 +293,7 @@ def inside_parent_span(self, paragraph): return bool(self.child_parent_num_map.get(paragraph_num_id, None)) def is_parent_of_current_span(self, paragraph): - numbering_properties = paragraph.properties.numbering_properties - if not numbering_properties: + if not paragraph.has_numbering_properties or not paragraph.has_numbering_definition: return False paragraph_num_id = paragraph.numbering_definition.abstract_num_id @@ -438,7 +439,8 @@ def handle_start_new_item(self, index, paragraph): if self.is_parent_of_current_span(paragraph): previous_span = self.find_previous_numbering_span_by_num_def(paragraph) else: - previous_span = self.find_previous_numbering_span_with_lower_level(level_id) + previous_span = self.find_previous_numbering_span_with_lower_level( + level_id) if self.numbering_span_stack: assert previous_span diff --git a/pydocx/openxml/wordprocessing/abstract_num.py b/pydocx/openxml/wordprocessing/abstract_num.py index 98b0727f..cdad313e 100644 --- a/pydocx/openxml/wordprocessing/abstract_num.py +++ b/pydocx/openxml/wordprocessing/abstract_num.py @@ -27,3 +27,20 @@ def __init__(self, **kwargs): def get_level(self, level_id): return self._levels.get(level_id) + + def get_indentation_between_levels(self): + """ + Depending on the word version we may get a different default indentation between + levels. For this we will only check first 2 levels as the other follow the same step. + """ + + try: + lvl0_ind = self.levels[0].paragraph_properties.to_int('indentation_left', + default=0) + lvl1_ind = self.levels[1].paragraph_properties.to_int('indentation_left', + default=0) + ind_step = lvl1_ind - lvl0_ind + except IndexError: + ind_step = 720 # default one + + return ind_step diff --git a/pydocx/openxml/wordprocessing/paragraph.py b/pydocx/openxml/wordprocessing/paragraph.py index a6a74abd..511ff379 100644 --- a/pydocx/openxml/wordprocessing/paragraph.py +++ b/pydocx/openxml/wordprocessing/paragraph.py @@ -4,7 +4,7 @@ print_function, unicode_literals, ) - +from pydocx.util.memoize import memoized from pydocx.models import XmlModel, XmlCollection, XmlChild from pydocx.openxml.wordprocessing.hyperlink import Hyperlink from pydocx.openxml.wordprocessing.paragraph_properties import ParagraphProperties # noqa @@ -89,9 +89,8 @@ def heading_style(self): def heading_style(self, style): self._heading_style = style + @memoized def get_numbering_definition(self): - # TODO add memoization - # TODO the getattr is necessary because of footnotes. From the context # of a footnote, a paragraph's container is the footnote part, which # doesn't have access to the numbering_definitions_part @@ -107,8 +106,8 @@ def get_numbering_definition(self): num_id=numbering_properties.num_id, ) + @memoized def get_numbering_level(self): - # TODO add memoization numbering_definition = self.get_numbering_definition() if not numbering_definition: return @@ -180,3 +179,29 @@ def get_number_of_initial_tabs(self): else: break return tab_count + + @property + @memoized + def has_numbering_properties(self): + return bool(getattr(self.properties, 'numbering_properties', None)) + + @property + @memoized + def has_numbering_definition(self): + return bool(self.numbering_definition) + + @memoized + def get_numbering_default_level_indentation(self, first_level_left=720): + """Given an input listing paragraph we calculate what is the default left + indentation on this level. Based on this we can determine whether we should + add margins to html

or leave the default added by tag.""" + + # by default a list is started with 'first_level_left' indentation. + + level_id = int(self.properties.numbering_properties.level_id) + + default_left_inc = self.numbering_definition.get_indentation_between_levels() + + left = first_level_left * (1 if not level_id else level_id) + + return {'left': left, 'level_indentation_step': default_left_inc} diff --git a/pydocx/openxml/wordprocessing/paragraph_properties.py b/pydocx/openxml/wordprocessing/paragraph_properties.py index 94d47c6a..c6bbc374 100644 --- a/pydocx/openxml/wordprocessing/paragraph_properties.py +++ b/pydocx/openxml/wordprocessing/paragraph_properties.py @@ -44,10 +44,10 @@ def start_margin_position(self): return start_margin return 0 - def to_int(self, attribute): + def to_int(self, attribute, default=None): # TODO would be nice if this integer conversion was handled # implicitly by the model somehow try: - return int(getattr(self, attribute, None)) + return int(getattr(self, attribute, default)) except (ValueError, TypeError): - return None + return default diff --git a/tests/export/mixins/test_faked_superscript_and_subscript.py b/tests/export/mixins/test_faked_superscript_and_subscript.py index a0e6cce9..53fffa3f 100644 --- a/tests/export/mixins/test_faked_superscript_and_subscript.py +++ b/tests/export/mixins/test_faked_superscript_and_subscript.py @@ -8,12 +8,12 @@ from pydocx.export.html import PyDocXHTMLExporter from pydocx.export.mixins import FakedSuperscriptAndSubscriptExportMixin +from pydocx.openxml.packaging import MainDocumentPart, StyleDefinitionsPart from pydocx.test import DocumentGeneratorTestCase, DocXFixtureTestCaseFactory from pydocx.test.utils import ( PyDocXHTMLExporterNoStyle, WordprocessingDocumentFactory, ) -from pydocx.openxml.packaging import MainDocumentPart, StyleDefinitionsPart class FakedSuperscriptAndSubscriptHTMLExporterNoStyle( @@ -342,4 +342,5 @@ class DocXFixtureTestCase(DocXFixtureTestCaseFactory): 'fake_superscript', ) + DocXFixtureTestCase.generate() diff --git a/tests/export/test_docx.py b/tests/export/test_docx.py index 8ebe3f47..15f5d077 100644 --- a/tests/export/test_docx.py +++ b/tests/export/test_docx.py @@ -33,10 +33,12 @@ class ConvertDocxToHtmlTestCase(DocXFixtureTestCaseFactory): 'inline_tags', 'justification', 'list_in_table', + 'lists_with_margins', 'lists_with_styles', 'missing_numbering', 'missing_style', 'nested_lists', + 'nested_lists_different_num_ids', 'nested_table_rowspan', 'nested_tables', 'no_break_hyphen', diff --git a/tests/export/test_xml.py b/tests/export/test_xml.py index 7d81f3fd..2ad17a88 100644 --- a/tests/export/test_xml.py +++ b/tests/export/test_xml.py @@ -461,27 +461,29 @@ def get_xml(self): return xml -class SeperateListsTestCase(TranslationTestCase): +class SeperateListsIntoParentListTestCase(TranslationTestCase): expected_output = '''

CCC
+ AAA +
1. BBB
2. CCC
+
DDD

''' def get_xml(self): tags = [ DXB.li(text='AAA', ilvl=0, numId=2), - # Because AAA and CCC are part of the same list (same list id) - # and BBB is different, these need to be split into three - # lists (or lose everything from BBB and after. + # Because AAA and DDD are part of the same list (same list id) + # and BBB,CCC are different, these need to be properly formatted + # into a single list where BBB,CCC are added as nested list to AAA item DXB.li(text='BBB', ilvl=0, numId=1), - DXB.li(text='CCC', ilvl=0, numId=2), + DXB.li(text='CCC', ilvl=0, numId=1), + DXB.li(text='DDD', ilvl=0, numId=2), ] body = b'' for el in tags: diff --git a/tests/fixtures/lists_with_margins.docx b/tests/fixtures/lists_with_margins.docx new file mode 100644 index 0000000000000000000000000000000000000000..a0db187ceaca86633ef5b6859c8d58bc61ae6fb6 GIT binary patch literal 13237 zcmeHOgpC-Z|EhcDRCQHN)sd3~14joy0-yl^05L%S3bw-l1OUi~007Vc(4ZPZ)>aNiRt`E! zt~N&YTJ$cK7H@LFK`FBVpuq9}J^mj*fx7rnvtCAIk-NYb|LKM$i5ANIzTw;`BfL)! zAfYTW!Z(7cZ7&_W9IW}^cvnlr+92i_tG?CUorY7=H)3>OpE(H%_)6<;^>ADi8-g632wFULLd{narmOJT=5&s) z<%wS<+HNy0ku$GVs0T_*dJeXxIQR!W?eb+rR45I@$3&7)tEsoH9VK-EDmtx!hh%xe z&Q{XXDrHj@NxkLY@JXl`kY?2H1WDI$#F8jJc|7id#DFgXI5_S|EOcL7Sws2u+uva9 zP4umG?Y&b!(T2`r1&rIfunle$(8O{UPJb3QC(C5KJ%80Hwa4Y)7hZp!uMaWwy!DV{ zV1PIlW7}j{zC;3w)$1!5K<;n4*`u3!eFp520jeGxP&ak#j4bRK=zr4x>g4}pMf~N~ z%VNhRzy#oZ&i&3Aziwn$LZBR3Fen^K=kf9<51%2PJw=Pxd%8o(S1U*&5mLl?+@scL zp7!QxhlI&aceR-c$kz?ub6~q{RG2tthw3_TCk0;5-b)m`rce55z^m9 z8GBw&@0910FP@_Re1rxS|L|47q>h0jTD$JFd_N2RFf!R}7ye>kT)+}3gUkD|by5GPBQHuo?Qe19fFZYSkf4l%4i({!*TFCZ6 z;jy@SY`wvw7S9=We%?~c#%9QVi>s3USRzE{sSm}nomVX*37c@~YRCY+OohaVvMj!RlNeSx+-h`;^q3e&*?33KoBP>4UYYkBi2+9-{EQgq z5QJ*Y3nUGef%Xa{j@k;e%{zFWk*=oK<3alR=gm@H^jLrx$hkgH$B@=8NMDe#i-;4&>G|+?$$N zOcUaJ75u3~(@%}xDtehd48v8Wity4)XNVdnyan0pLLF?x*+ngf(*>7g1I$AiYiNA5 z=__}2vsMS$e}gHOyXs)*D)2RMo3xs`l)SUPI%piK>>YmL(Ot|QM{k#XhOJ3zEW!%r zii_Z&G-%@uZIsk}fTS8Lji(NB@N$m6p=Z7XgRc;I%n;owc$KBvhZ+5kb+$4Gtmdgf%s5l2 zT(Ien8`tvc-^-wi41f0zW6hd;#J|{}P1OnCJWXNsz^ld#6UoY8g&Pn*d?3;+OG`_G z(oadl>|{o&P7qZ9Lu7?&12HTLNL#Li$a>@PvDVzcWXz`Q?a5K*Q-au2%E|ZUvW%i1 zVMZWl$`BCCU9>ME9|OD1OXa~&@;&^FS?wDh!0277n=-MK-1er3LaV_83c zPB?kk*T9ub-Fslvv@dsc#a?t+k(ZH^Fs#MKn{Z?wYG}bxQqw2j<6aHtL5SvET)axGpKac zIl}&Vao8fhJl#GZ>s4tWt*iZ3uojbZdMAYXq(;km(v^ohp#4+W!S~l0y-^3zL{ulY z87JvJl6@QnBAe!GS3EwU$>dF9j~j8<;^ikG^tv(pf^c(piNWs9k$kV;*tk(~Mo^+} zA^7|}8PN2-lq#=P*@V2pCgh19jpO&^hNvY^VEjMMSj!wsZ(1+f2g4zuGbULPO&D{C zZfQ1ukEA&;AbG{`D-BAnV&215!^BO~WSPU#xe8{qff3(~+a>*ejIRZUggg}Z?a;mB z<@$KA@x=Ts$?X!Z6WViQjRp=WmbB&I%+U%XbWb*q>f<7S#L0GdqKU_<^4Sy+Zt#9K zHte)^Hrx+m0gxc_`CQ&)&P>&%*V`P54|;yE$6QVg-svJvye=dMggR^+kr)3q=g-&t|9U%pk! z(=kweS_Sm5xLo(1@OgPZ?+;qPKHVgM@#@-`^-S7Ea=7?ZbQz?M4!?)wSG@JYJizHI z=+72=Pn%t1n?1wrn#zxeZv{h5k`yju z+z5mNv8OOdMniK271<|oZ7{6Hu%S0BP!y)brUlDzb^qzD1OGOo5^ze4Keke@nA|zOgEs2j7ep ze5}T}n8-i35hcmwFUD1AZR!>o<_!CkSofJoW<}N(#bmC$84AUb{X4lGukKQFR)dHK zk(EZv+Y!{KOPV`q`p^$zR+v!V<7~W9&tBVY&Q~WtC!ItHo|t<$sx4YwwX@&hkbjk; zh2h**Uu{m;W{Qbu$&FY*g1wf){Mf736fd9!86H|$H8jD*k<`ob9hyp~S>}DRBE2ls z`nT`poI@mh`QpnfpOhvGW&okmt=w8APOg|CHTCPu+yUk5C+a0ci8P=ix1M6y{>!B5 zh^QGQmjtBW#}IpR4|J!M&+clbbV|#Ho}scI)ZW5Nh}yLa_@O2g&UB2Nbu+g!B;VGh zMIPOtFZLR)km%8}rv+5QGbd-C?t>k)G>R9C$Oh^si55N*+m#C&O5%y>Ep{cW*_X?; zglxB+kAH2K!@O?(KA2!77>nu2fLY4_ie;3-OfMR3LMG*$a6MVg509KaT5nb0i;ia% zVfTkG5=vX7rdnTBUo5S&+N83*2J6mlz!}Rez~(BlRJSyP zYD32EEl=9-VBVQ3?7l5gh7xwHoU6}BLUVY~tayrPtDq0wN~LZ|oP?IYskJKxZ>d*E zG6%tgziY?x;(*RI#-4w&;;4Wq@2Yg{6)0tc?o~0sXM38ix>tY{F?4-PJD53ifK#K{ z?mm6Wb*^)Doa#wa;F@vlZli^Fa>o#{|8mCw7#(W+#7@z!V!c#xqwX(GhmFu$uRTY& zAbMKFoEn~6wVKxUy(=FQ%3lS&Zg5?_6-A^z`;_KkUAr8OCCxvZ`7qMIdsMk>Z>;p3 zx-E6_lJ6f)$o>7DJ~Yr{64h1tx=GSTAG;yvki-+j{@ z><@yXs-k2u(7oGB(uN4R@LilO!;Cdg zMo6w#d1cxq4813)C|49a(~>+;KGwBU!YkGovPSu$L@NNTx*k^=^E~nJ4W4}u`l%m3 zq%Vikec0j08c+zKqTB<%JOrD0(qcMd&T54aS@J1*tH@YBgwyH5 zXWGF0fi|7rbdN65AAxHPRx*Fcxnf zbX3ITmGD88EfQ7Aavo_{eC2*z>mm*D%p$XcwdoKJhziUg7FdyD5!u~(kA9(5uPRRC z#@$G*Us4tNhDK?zJbFZ$2@!kULR#MdKArkUZi`T}I2XO6=8D~!Pt_@58`qaXg{X`l zT5jKJ2AQIac-X!#RSk*~;;C7<`|ublar+Lmn_ku%N}Y+{$w{b@dN#o(M=S8b<)+TmrmJtu3 zaNCj79ZLr5h%`ldBQmJ=`-euPm%{T~ihop_yDMWSa9{*{0Bq=gdS1>(`ij43PW*JY zL?0tE zDR=-t)D-~u`I_#J@X+4D)xzkHt)?wQ(tec#b?{uHe$zL9_N@Do*(nxxGJcm7Z*eKo zYPHh)b(KMuXylk|&nOSzdQTiSNfB0} z?zObZunRG%dM`@vTH!Qd6jLqnOiOsntiru8+OBFWsS=VRCUR5b?O3B{ofQuCvCUWgu!O;Y*1G*uXu?Oxf@n`BqkW8%1u`et&c zqKfa7;d&D*<)hK}@4gAu$af`%5W@jGp6x@D@8sP%XiCEr7@ZGHTBXl*4#BwSc5Cd| zywO9Fznutkoa+QLA91jMC1iO#+bU_@-eqrk3or8)mKVVU?#Y%`qIV)FIkE z%l%|@O0#PhsSnr9F$@`BC;fyoFAm6&_1^ikgzbiivJ9eGe2SzGMu^(e)cV1P@$4*sXka~Kv$v^r))m{? z`4(~aOlHBwig`Y?LMeG~{*W?FQf;qhN4`I6a8|WK)fh&och;YfK@3;i$CN3JAo#0(H!%J5U?=LMlc-q;+6_XpzZunrGh_104k z-X#PH6P>W^372W$VlkIpnuGb{bh0oeU()HKAPrIbioQp8Et#ivff?<&quBk>b$0YUB8l z9q-6jvX(;ZSryU}*LK?&$$7Jw#a%83e^Crg8bW!Oau;EchMfqcYCKly1+=ET#g=k2K)A)Qx<{Sbd%QD<^y%WF>lF(RM_?BvLLXXnIT6LMwvBrwMAED)HyS-*y zG%+fZBrs+vBR^uu5xhOst_0~u2|vVj46!80a_792DHUYv*bjP&i7}KL@}5Y9Q-Q5j zS>cB#I$j-}pGMG#w3^Zy5tjL~de9yo!jhXN;L+aM9C@~zSSm?PovCN?{Tm_8x~Yefh5mlBGFK@pc`7NHj#kn4TS=o}-ZLB(Cs>Q3Vl zrj`*-ud!H`;)K?h-0P=E@P;-P3c9maf%eo6rY<@Y*PSr!t;u};Jh-d4ULGzs0qwl}`H(<3j!0rV0VSZ^eJ|X`?TM%Jip0#mLzR~$< zXY$IBI?Od=#!(lJ^rRb8Y$1@TytXaRqIw&JZRw(DRq~K2=!1>K#KntpaDWs%*cXMK zt|CK5Mrs83a3!Bh=R=OF%f=%hxjSZI%%$=H`*OuRvz>KF9rg3x_<{s=Vhrr6z8SMMTD8!+*=8!Z%@uYlVK-?+h2eK<9 z-nXTx#_|hcTj7+Fic}#J<>gTL2FJ-fR-K%UJWz6w?E7QQ&Ui7?s-MyHoj*h;r3 z*@`e)>zi0L<_tzBow!o!qo^>XP5+Qjpvmh;`VZAUFHgF22g=IVhM%_uz<_sMl` zgj@*yj;hX%-!Xg`PUmyg*5#@}?vQMEKwK6RN5xZc2jDU872S<9R6MnyZJv&AwVxo3 z*iml(=}Y2xe?dfPUT>Cgn?mh#+gEvt1D7B33lX%t=Y!ox15w!N&QHPJFhfz;@gZ&d zMe$z?;U}cM`nE{-K4jNo;G@_q!>;VsVALTEt3;#5mWz&o0*B9n@PQH(A^RfEp)f=6 zfnpSOaZ}0>L;UeCrC@$2>U5uNP>hvbVEYVCofiOX*oN{0X zx7yd){DJ8Ix+Dg2H16Oak;~zsm&@@K%NCRXHds!%pB=!d{@^S8lZ~7&?3h0&hQeR$ z*d_;qps4ExA*kz+V3fpQfK333`j5eX4H5*Wmh*L$`x{UDBVFNaL3hn;!N+L+z<6N$ zWi@|I3W|{o1@YHCc)plcmOf@8yzk4p`Oa(1^fZ%DK9_a&2&Fl9X``e;!)N{jjD;xQ zBb+lUk%#RhLJ#?!WFSjn5zn1plcv2dx?}yzs?n=~DZS^{w4QHZ-1~xjK3VYf;0H=F z51JeG;9+fXyt2-8-DTH#438~|y4c)Dk}c%)t6ar%HsIlpGvNjvhwsiu7b4$(+v&hR z#qJ?I8y$-z{QpdV;{q=H5aXGW>#x<0_kr_DR4+tx^#BJIG&| zN!?7WO9BB#wV0S#%*+QS2{+d-Kd6Yecw8+Wp;#W4Yrjc6*&rA%nm|a?&Iuk&MKaQH z%WRk!ydp7MrF_3>K9(jmi&2ROcqidrpc1kSkPKx+fhtQ;uyMGJ>c_F1+;Y#r6#>V6 zG+Q$({$Dc8z-m=LNA?GCbg{cd^UE73Pjt)wlRPQxsz_@6LZa}a+mmgIZA7RiDoz?Al=mw31dCpcC>E1 z9?nunhsI_)V++~GlZF`Wofai61Ey)G=M{IV2AU-+>z2ggosOe2=quR@9Y^!Vlfo_& zCfDU5si<@Y>-oq6o^hc~PRg56yylmgyy?(!<)-2mf|R^K30CSaE~p_Jv*@iaOZfzf z?7U8<4W}}2Lah$-2&UMErwjQJ>C$o#k7vvELzFa>DY}0pV z90WVhBO4bQxHHGJhm;IgP-qD3fd}IZIZ1o@wnn#X#a(31> z_6!Erc1HiM4f=0h6&MwI#A-?QGNK2aL%a$Nd!{*La7v{4#=-3H+b~_ylbV=Ui3_3L zU7ky~map2lq+}j&yH1*hUaHJbhK@*m1+vZ=fyigXG@*xj^%aN!@~ zQQ1n-&D>VOPTL4Zl<0QPzpr+rK*kqBfUR@pkXG=}=TY6OU~@=dRuJ!Gw!z@Rbay*3 zM6zN_GmLZmG?cY7xqRZzZbs|aN`n}T>36UOdL5C^FviNM&PW66aoen=H=dpqXvshp zJV=nCG$S#QdjRX#?g#p6w892wzWnko)k)Z!7IF9d9DowIbni7}U=$;rxvwY;MLMn@ zJ3yLN*vpN#AK+1cN&g@#=m>q$c}Yy}zaS=hzkWF5f<8rmxi_2B`Hycf%5Y8P+`$0= zNFo3L_0L$`%F$Ba$PW1G@25QWrL0Hpu%Qp0)7%iq@ z>4OX(!d6W;*fp_QdFsJT+!w7@0na-~abQBU`r1`ExjAcC^MN)DMjQKr5F0H(s3!8# zN$=^krR_RGd;qKs5!G+H$3-aR@_u_!hnHaAC8+`#EfNg{%;Xvj0ZX*+eMXuwrYI#s z%_9pU&hSvs=4VpYVPmwY;*wM2Voo-`NR+o9ndx;rZ3P*v3k=pZ)o|{0Y7%! z!%MNxnMu&~^wy9QAvHaWG~W4nu1<5I#6fav_lu8h8q!w6*6Lt$1s#Zlw^Ad$H>O8T zKA+8VAouKW%@t9M)D?;y`^+6_#pWB9^>G8*0j9og|7jPy{~%x8}nmcc|HT zC`-!`T!6?y`s~O{DSb8e_*RfnSHv*btS=gUWs;WJ6mxUCrr1=9-r)vRbr9t6wx$2x zKZcJ{yDurjS38eJ=W%4@DqC_!@2brKypttuJG+iF9!cPD6S3j_Wf zyMD?8YRFfe&PRDSr$+6JK6c$|ZI}(7(ZPh1K?~o8nWap{onfLnb9p4RHz}z-a~mSYDWo%x!@`u!y;T5PO+sBMSKH zjJ}K^#v{T$%g|EnUUwslp0V7LWfZ?L%0*$Oy>T|?*+9fYF`4&xrP5AOVF{%^Ic3+W z6fpm^FSuQ(IisnIX?gTEllUMCH7B`1<-K#hfq4nTGLk{UK$ZDDJQ29Etpf;kEn5z2 zw9N<8SFZ+dBCu&&H=Oph=pSeS95GT)h^yAg=tT`b!7@KV%W!ov!2i)%n-^uBQ`z#g{ z)Lpi1d=W{3+Ap@>~J$umKbv1pAikd1Z$V)xR(Fa2dzk0D~T%K#^ zj^)dAEboGzJzt{UMXO~fA0Mu4|1d?wzU^3e{4pJ^ZD!?bdHV;sx9r>Vvq|-L&@*4j z+q_;{Z2Uf|r?mmEIrqrhkO7@gswruG2(Ku0qn6P*tIoz$_HfO&L;{m*kjtqz1R5i2 zf)&pnrZ^e}7CcJLene%vTls%`@cf*_)EI64%B&7egAkC$S62rvZ%!|@cWZ#!nU!@`mH?i^lbrI>Ga}-6Jw6eB9tGNU7%T{F!tgThj)3^A;bj~GgTCFgm z51dn+63}iYaS&TlnH5pet>jf;(YON!Ir>Uog&U}LPffUBvIRJkh3m;e<2{@@@LzZA zCr;O8ySwq`gQ!xvn?AqEjxBm07Y6^5EX@Fkj2*UzZ%<|lxj5I=I(;D9SgMzk$J*7- zFfJDjpSYmXOUaa%2bz{yQsm$tI0LG2SA-SQ_28(cg-k|mp*F!$dqOF22+PLyfQ_Y$ z9zF#nlISOaw-Lp%Hz6;sSTj;*Rcf%NA~8u*4$V4Hu6D5^t4q$#X4_&yiakA2)N{M0 zpBn6!;LanuEpt)cy=MwJJis5ly%?y3d$88+90M?c{D9zDg)2e3))fw8*2S#ldoH1i zL$0JsEo8ucfN%|;+n4dND2iVGxT0@8pYaM!JfuEnqbxJnthvkY(y??WU%6iUo#cp$ zS8wY5(lK!_rj@R#U3~Y4068YF^d9vPW)9ntn>P`7WS3d6gVJBVv%_A&!tidt%Y-CC z^0kJxEqW7;HHB!Xs5F^Im2_?tNXPx+gg>L@m$PB=h9>E zT6vPf!)_wr4jvNvrV*x-SdnHmz&hb9z&f<&5{A!7&rz?*8R3m3wN8=OL52IUOXC?U zLN>%T3Ic$;TEvuVm46A2XclsScgL`i=z%Rd`{c263B%h8Y&-(z87PMT;M(yS?OFp} zyGCFy@~^I)o{i0aJv*Rt@$VyDL0S@+5ow2e`8?B_cJ>f5CMt}AMBTG+gCA>(ClwbR zzVqTQgM%?v_=0y{| zbMh!$b0<_3m9UiIJ1pAm>y~f9JI%;y&P%0jx+~1H0dq7vZ1p585%Vqt-_4^k`$|e= zz&p^~{P*9;`0W+ByokPHunHi(Ts3TnEX2tf5M_|WBlSu)GWxn9qrp19*)v=HqWO-S z^`c09%nnAMcI!Q9+#XDDwNI%GS$@bJ#mB-o#&ntkhh-{#VF`CCh0#Jn{p~XgibGoO zD1DMQK@qD3o>OU04I%V5dNOV-wxIh|t7`LB^7NDTwsozKb-yI^(nK=g?J@cN0D(+M zdO|<3;puzX@%FEknoMkwR!Th}P`P$s24xG+)vKIsg7MMN_O|oq?!2K(J?HWc(bHMI zBpTVf%-y+e&ae(=mLU@M0zU442`LCD9WXuh&%aOj^Go^j^B;b#ASd~E27fQ2{uB7~ zGaZ;h{-vb)ci`X4OMeBn0?U{FuVT~RIsIN?_!lowpmq3 +

AAA

BBB +

CCC
DDD +
1. EEE
2. FFF +
  1. GGG
  2. HHH
  +
+

+ diff --git a/tests/fixtures/nested_lists_different_num_ids.docx b/tests/fixtures/nested_lists_different_num_ids.docx new file mode 100644 index 0000000000000000000000000000000000000000..304c035f671318788c19ea401101af754153130a GIT binary patch literal 29989 zcmeFabyQr-wl~@Yhu{_(v!}JQrO7=J&vF2d;PwAH{ugtg{^PJi?{i$4 z>#&EAiN<-wmS=^1LwWJm6zN54qw~0anLHg(yfzN?C%V}tgMKx#CTN9XsuS+e?TDr%v(7!93Ennzp z2)-I)9x~{AQCW6TG8TeC(UXja>Uj{yCS|=sKs*`NI)%Q5{XGP!w0z?5k%Jh>$CsJ< z^9!oA&jTneji&0aZZz|s9Z~*7%V);kU*)hp=`Hs$#$NzNf8000bz2vGko zk~-MK)CPl3zcsiZV!|cW%-#C62Pem`^e+khUo4jYH1vwZQAI>9+_$H}r?|VTqmAz) zd6uHl+M_CPo-#;t|*wn1j&#cxQEh4Wr){S=6H~;l)wb(_I$nv%*@;A)3_Z zUN7OjDUWSzkt>h@KNJ0QipG88c z5=7azPix*U7UjLy2MW%maSzS{+izyyQ~bDDB||Vsd?cl{(86E&Hs0b6^k^iBr$Ku* z=PjP->$AJoOL7*4+##R)b%UHIoH=x4x*2z6QtSa1s5~15HL6M&g^oAWg60x_+_Y&N1thQ9Yyn=8AK)=mHE1<_MjYAVRGT z%;9Ts;v`qqZNVaWODv*VZQz7}oCp5!CwxL*rbmNk{3E+hYd>MzR4aLIoN!X^*{NFK zd2!oo3#<%V29S+c2O2%VC(3`HMi3`@{yumK0X_hL2cI3kO{0IFK->C8F8Kn4foAqF zsXo79iX^*9ye92r?*hBl^EUS`1nsUAx5R^oTTzrAhRn)Oe5^VSRPO`){h^uTgH()W z9S_f<`x8Jy#WRKzF1*$fWuF+|Bv&P$201w!TWeq%Gknbq{3&y+>GO>KSf&;^)#u|?N$2aGl$FN%(FL!N#(`S>Nj?On0ECcxZq#$XI( zb(IVYrOxAn9swnTpGC`MhYF6MShppb0BYc*DoZr+%@LlL(?Hxe-1`_o+#<)UgqY~l z3qA+g?Zc~TQ!9${?r z+;~(1oCjwSJNd(`;&H-&g0&Q%(~z5t#mb}>_|7)<-yY$Ste2Fm%uBg99!wTp5lo9o zo~tdt^H$2Y3m1w=jj~r7_Rfhbiuzncz|P+&s=y{4ZmBdV809qvle5^AwxzdOsIn)0 z#iKIfT0#9XO_00h;9C&=0A;_2#MnOUn_~^~W9C7&qq_9eX3^27?tQH-epvyV!R8!z zTo`)@_QTGqN4LRj>tP(|H^q zPu>d7V+5P)I)yz0dG6IV8ar;?q>m1M{1lTUcvyNweCDKgxUDaG@~z(3sDn++oJL=d zEtH~A?Tq98*>jThtYQB58G0M|rd2^}T2zB-?2!^5Vn|0EOwDhL{AkwVT>NO<#Y?D7 zk`^RB?0yNOHK`LhJ&U9n#pE9?-0}EAq+CMwox5zi%*idzkqgZxp?pcVfSe>^`Gr3s z>BGf)q%7BLU+!kTsp@-VJpae;o_nE2oA1Ua1g#bg?G*1RG|b8IBnNiOiow2w+k@Gx zKoyIcOxzc;31Sb3QXtV1^6(jX0~DvF2BF7n_u)HM5}!W?)v1W7adUb4Xrmco-v=Md z<&$=!Z)B?eWF`t=5{;~yVk4T$Lvb8+eQfwXW{gc-p(#VLnIb9a2PA7RF^jhP8y^W5 zAeFhEr@?@K^a@jsB5<_%?q6D3cc{k_xF`TXEExbm^mmc>w6k`y=KL+?`lUCH4OX0& z_y{|wzW7jko*FhXz>vH!j2f3J6^wZ7n@lJ1*=us8Lkg0f$h;Hnev7d3B^{x4soxpn z_4xFwBT5T;>xBZ9>ix|}{Xd>W-kfLl#_xTgVm`XcJj!^h)F)6Rv*x&bCi?dMC&sVx zcdJR~3YA9)9OelxB{8S3QzHF*;>2Ocd?NUy-|;?NA&UnGa1vMqYFD4@^GO9pk7>}{ z*?inl|Hh(pgdU7sh5vgR`95s8UI_&n8_YRpDJcHOx7B93*>k`_i3Tps@E3-=D0 z0Wn#+G0!xK*;(XsS9FE7q%Hc3!>1;gIJn=!j`#gK9?lO3n~ogmXs_nUyb11`YK=(g zNmQH$Cl3~%qxIy7>fg-)XuaLG#=eR=SKr$MVk`xx5~EM*recDLW&z1E5vLlq(+>K; z-XKR*akQDi9!GV9$KIeMh5D#m1eWLxY_X?T;dD!#D9Qa-*d1FqvJ$_l$hx}Kgk1G{ zvzFPkhL1dJ-V43%F`u-rS|`_D(Kjg!pt-7Scq&%>nifH*S$+Lc&QbM<;mTz7ml6-7LIjqp=5 zVFSo}Sk*78Ur^M)WY%$aSM1Vd)_D)Tnz?os;Rn-c7noV?EMct{ZCrPfs-@g8YkQGbrSAX`qYTh8E%K*rl{eEj_` ztOSaoVr9iJOrQ^Vnbtq0Caep4{hqDqZSluM8tXB^6+O$(8v2B#ey)wr-QsE5JYy-G zcHXpo8s?2R;mCh%z(a-i+5J=ABlMixePl`OI@>2kZle8M`59wK!0>Kd3tRay@U)={ zdA^@O>KlU=wD0~#HG%;c;UB*)$Nwmd{n(0#pQJj7p``( z_l=A8!GNaJM^LFQs=49WPFFUg;(763FuIGG&o^f`%J@?`8@14V$A^MhuhBT(%gH+v zqh2Jr2I2pNwY#1!jYp&&#Y!GI`g!XPCZ7h;7M>oyEa8 zS0>i$HTe2b!UQGeef5`bV=n|!dwDL)Ib?8d_G?%J3O62md=DL!XJu1~(qBh!2N4MM6iO-ua29up7 z6Nv*jiOXNYNUT3|bI5+MWl;7>J|C}n`4~52xWT#T9U-}Mtc~0#n2urYT)gAk0Vns6 z9iLRq{8Vk}_m0V^y6bRrom@uD@4DdT7wmv!!?NG%PTsg4N5kFVQF0tL|WY@We4Vof&B?qYlG=D<*e*U33pmd-j}8Hwu`Pp z6x0x1!ur7#!&W?*hMW`D+ZEGF0-p4c9Pa(NknUlfik*@2Q0Ij}ffLHyln#DHJ_ zI%oicWGSNsVgSIef4GhTzzs%qcNbR=xB~HVvT%N7|C+0JZ^mD+9E*0RU7~04(sfXaFPxG638^_uKwN41xR~*RlxD0C3Y3o)7m4MA!q6 z{-cg6{QT>H-|e?He>)@QApA#dIH{aRf3J-YorC!I>%aNAP#t#c|iBahj8(C$A@aEfZ{hoYllI-T`z{&IuJk=iAwZY zgp&r_2}_D>KaZ7!YuH8<=o3`{zCqAuYr)wUau38Xz(xls8wOyp1Hb@b;`{MXb{L>HU_<;t`v(FHfWg0Be2)hw)(LTGV}b#` zrvbz_WTAvGK%O|L2?oHr4ut`3v|ghA)yKd1_}6^=|0FY1aM5Yh>xmC*462Xs^Yc>+ zUqxtWihsbv;lK<+f%u|XuC0q>wjRG{7uE0)u4|kj^|ldYQvn3Qemop>puhk_gb;BU zz^Z8j26)Kxl>zuB0|)`?2rrTUCr?ztL5-=)1X@7)GPmoIt+i2xU#UfLb6=*yhYK#5ZwW)FT!9#7s_)FjC z@QMTcO+Y@eF1)9)vk#1r(v1LI`<9}@bte~92oq>+gFQZ&b9V|V=Oq?hB8SsULDko! zXIPchM8eNc){#V<>Gagm&C$I*7g+z%iTL(zOb;HRSkI#0W{q zYo{ra(?6Jdrz7h4p1r7j^R#U8)}h`U-v5vX^r4OFP@%nE3OPkc_*@!OZ}~X(+V}=q z8{Vq|h+m&!ad-zg^bv?MeY5$v;&L{8e9(K2s;)D%K}n5J#4vd2;DHW2@P1MGS|~20 z3SzNiFeJ*hGV^muDNS{*_B!CYFlA^>wjjuC4l2H8d68E0$DY%FJm!D(xMqIuaRE_# z&BD#_p$uEukzy~vL2(^qkVZ8M$op9rehG-&CKU=KUIoM$;{$V2B2_ni+64jFj~cl#RUy0zG0IO@;SK; z6{G$#*MU%>s>+5tXna}b`~2E`c^!jLJ9UxJN0KxNiO)v@ zbvX7v(|NE2I{=_-puL{8#eB7yKw*2s`c$%r@E`{xoJWxFFn~s?abx1%I!UaET!7m; zRrJH-T5(VviO%#zJQfstN{rF~NTLG?;0P-m+EPP1%6eu(JFWu`IGP<(uXk$WXoMur za@sSz;rBevZ@BmZ7njSMy5!Uixsxoo`6`BVAnw?lx=D2$TX`Uql2m34Oj2gO)09|0 zrC5XVH8g{fcbei)UV%Cqz;stejo_yb%#Glk`1er$Jg6LG`gS0@cytpdOpJ2&C$@wC zWB?0OfEZ?#Bf|_!N<*6em!`#fwWHgm;*>01xy4S@Hj2$4Ovv0-$}H~+Z~Z9MCet7m z>%aqARCNVM+Zwjaowz?bh_Bey=`|^=XIXJMn|GOCkp4*b^t|kZ^UF7U> zS+E@FtPf8B$8S}K2a-Vbz?`9NW;sC9sVg6dS4F=_TIWDDq2UpqPaPyx8nhlg^7Y5D zk;0v^#wgmHlb1rh6Su)v*%w;7X%TEs^U^v+QWp%%`Ohy<4*~*Dxetq}=@K9GG5Us* zDfIz6XX`ohVz0ll4vwH9X(K?m*VUG&t}18!`8Q=)W zr;5^RVSwy3#~`1i=W{81Qkj?@AEp0Gg&8t!h zBXwdvU;EatHFK9vT#Sz7n0`vE_GFj>{@+e2bD~Ec5999Hz@Jn zw-*u4nobz*Z2ZyhliHN-1R!c#e07`bOosPm9fCl|D{qJ>cG&tFKCo;}8IuR%lSW{mWOakJHPV*R*@A_3bn5RWlEI&=34B z={1+a&t+_cBX!s$%Y|`KRHt8#oxJNQtK-^`U?KweQ*FdAo(^QOfy9q?ac_Bud)p5jTp!&)@(1-e9Bb+cw^x3Ly6-3RJ#_0~`HM>sq zlWM2>yJ54arrSH7OzG`YRp_kI!r1`~;KBpf*nTSNrdQ_=ZJnGg`xh<|o#YSC{9l5z z`^C|Wpu!;4R5y^+W<+QQ3FJYklj;b&rC~e@%D=uAc`Zf@l8)$VgaMouItuZ&L3J|qgH6Y{bQE?zt%^;YuN+Gd(EOrCo%#4c;eAlez`#3YsCm%120hlOo29uji%`fQ8Tqkkyn)t zLgJBZo`!C<(e2ZzPHEnbO}6dFQx5#b-F**}LG^=SFo3NiCq|}Oui@VNOBjI2cvWR% zg($UTT6^ClZj&iFDSYHY;b>;_*)ud;;o%8Xx1M|LD2uom+z=k-LbMj_ zwEGn-4{8W6E?)y`A|>^u85?r6I+Y%HH?d%EQVAPzS1taH=-iZ_#e5vo};WtS4snPjgP@C@vi;GGIG$DHba& zO&7?0_|e8ax>D)mxH>m^f6~yk3K|cCa<8{z_x6fmgMNl>h+`c%zOc`t1dSyW?;BEP zr1Eo{E;EED6&fw-8&q7U@{@S5x3^gsrAHNi;Hho>vgF&i^b-bHf7dGi2X`Mh-Qr71 z52|LHdiO(Xw{L;EZ^dE1yUBA)v+af31@>g`kBD{CBCfu6v~(uvw1H708$=aPeyV<* zSa9H9Oey#AD&0>r05dlFi^9!GrC7vXDwos;ycK?>vlD2sPt)a;Y1%>g%4Pc$Vl-rE zJt)|eqaHM~Ht?y9FC|T^))LeY14Ond@rUW~rbALZiVe-W%k575ugeRi{5|b$nu9mD zSno_jbm@P`>y!#ia*iO#mADq<0)7?%AdQ!Xi)_#JF<;)6U(L7l%{Ehhh6u!$7h`X zDr@#eEWV>u)Wz1eC`P>c=oUKWxM3SFZX1LE^xL|M0M+&!N&V=wJtcvJi$|_fP1TS3 zN;p;o^>7zM<}@ggCQf&K#cLWoUBK&&*;p{fB&&($o&FStfuj&_|5%i028rG(1(?0STZ|rbGQ6gL z$W@eHW!U>VRcoQ&zQRz+-pQ-Lp~Y!g(%XJ1FTm^U=-wIChq_OdD2tlTyb=Q?QG^3g zhBQvcR<$IY8G#y+JXq%gRmv)P^c7*ssR}qXLL^`_y%_8g9%So+0m+wc1fevrUk{SY zKT!KigRi?_05Tv9&<)=*lm=}ufeT@PSm>KVe?4i)JL}>_ePEM8$iO1SYQ9)G)_s9d zr(>Ox9AAU!&jd|{a{~i`gM2+T6~f@S`1S00x_)1*;+}T8&G70nyR?c~N;-&?dy6ke zGALUMycP{rxHAPBn!WAd&)yg@BnjcIZm?tDwsKM8wLVMl(jh)Qc zcfRGOX3UOY1D3$EmHp$@P6DTnzfF;EcXC&rwIHwE_$@UmAk}72pLmpzUOd!m({DxX zftWGVI?o}C1Z)=3aBtab$_SdyqJoPCX1_1db%Ym%U>gHp>{c{+rHACC2v-i!dExlN zgg~h_T5wHzGvBIl_TVt{rXcgwuUgNqO8u7Qk2+QU)w1@x4i!R#?Q9Y;!X>EAc ztm16FgS3)~w&0f)>R%8VGn>0}aGpXYHcs&!Z}~ox zM6|ua3EjTanN3SFhBTRgw3R9y8`l<0$!GIsL`r;Q+t;P1u$x zAkS2#`Z#LXi|Vasm(4j|>pFry_%R#jN{)U{=0L;wHn6S@Apc4YzTs9 zR1(zGlf6LK&0Oj)G6R*nBRYIQiS9O>fwo+@`o9Lvg#?m9>NXztZi>JF^Qn3dXw#RK zRDD-#1Xx9BVs!fUYfqq@8$tM@1LZCSJ;~*1?j@HE(x-XXt=O3c_;&)*3Qn9d zG~-`1bX}S)zyMtxj1WK2`5pOyuUW`NHL*hn8+b6*Uo{J5mN(W%q7%#?4>c$Uj`!V8 z3#GK?04<;$4V$7KI@5D%LhNm143(pB6HP3iIISgld9>(fwIh91#c^_Ct&3HviO=M^ zpSAoSblU$bH~sHAm;C)eOzXu1W?|V+0!1Nf10C!T4>00O$i$`#s-td8TtqIUxLb_v zM-FH$++U(-Immhbu;n`5vl@RS<^dlx5aCjEm_<@QaVlHR*%N>F>ed4kbX1k z%6v$CyoeR1y)CG{0$a?!z}F}ImAoKec~W_YYmK)%8jh2-{>MiYwzpFLCcX!ZM(bCabT z=j@gsv)Q$|fHA781CaQj8YPgbJI$qQl(jGIXKB%)8If0|i_8RktA}VurdsM_f5-3d z7_y<09aR91Yu>nvJ|gW9F@dtijm^nTTPg`(MVQX+Ox3rOUbZt&xv88R9WVJo-P$Yf zSDo&8JK5ro;3lP(aGznsNpG=CxuKn*N@FR3zqK09{^Dg(Ca6_=hCPtLF^ddb9D8v} zz6_Dl0}sCC86Gx60#=(s96?h3E%M-%rfA+3lCBPMNXSMkpHVd~#Ctr#~pezcoi)&pv_MVN9H0)`#uW`MK!)nxx>`oFt|{+sFlz61B) z`}}{`E%}9NlK(&V3nzcc|DVhMzvTbV<^Ny$|Ig+BU;6*g<^Ny$|Ig+B zU-ti>%m2UZ|38=if7$5Kh>&UOES&as#L zg3j&0p-R!HaO9kT*e~Q<0~|Tm)&WP(ng2r0MWDiwbB%E197!r1Imbo?2hOcS;J~?* zX*h81>=$rOCIk+gdrI{SIJXD~&Y7CQfpcH{e*x!0;J`T}bvSSia~KYsBgTQ_=3M>Z zxVd2mIBxEZ|1aEJ_b=QWHqI~HoD&>3_g4r1;^1F%@c&gv{sxrm4+?9_{sokaR*Mot zXlY9N1(fp(A_e*atrp-wx%}f4c8P?>mKS`p0$$bvEHZGQ+QNU0LZ`^HTrvp9oI}w6 zDIAiAwWp`Ov#rOkxG43yOU_FIxB+J7uuLgVah-YeJz_k_KEmuZq?Az*9vP> z58MIkV^(wh5W?G+tS9+k7&%sV2#z~cgFtSqZH1Z4c;cr*+2+Dl+F6BEp%u2_7NXlO zWMeLW>-?(>@XePwa zu-0T!>Jl!~u!}1V`4tyDO5R$cF3y+u(F9nHWYdy*OkKfhK9V3}`||lZ^8#y6LYC>4 zKgKhK(UCZ-(gY$h3iMgg*rg9Q@17-+tJ}-C$22DPJR52=p@AjZglX$R@M9ww<$&6* z1+KmY&80*<@0LY8H5W&SQSd`8L(E2pb&1U}tBBekuqr}WF{%$}cbcN;e?F4IoL`7m z8U5Ui;-`3Es~@>(Qi;_c>Ff}g7feM2=&=cm*PhR0od?wT6a;C!xPD14Th6YdSmvie zZ5?Rv!Z0Q0If4v^bApccP*Uq&(@PBFZt>4eK?-!n`(al-sLZka{dj3H`7^bp(|IKF zJQS;OrqT~9d!quB4IgjGK99^N& z4eFc@ZuV_{an`B0CF02yl zLjGMyR7HHZ*1I(~pKn)g(f!!bJOY4=X*Rs5@Cnq7|%*aF!idE$YXv`1ButNKk_SsB(<)v1Foe zkhTU+hX)=;H?6-$&bkSt#iVYqnYN>+rUUURiJOwZEDx=j&tOR>KjE<&{t?dNKcmF1 zM}OCWM>_!E!2?7P!fX5%1kK0VLhGLd|Ko&)5IKRS z&WFM|-cQ?-9F9#crv`h{{-0e0Ij*-Jmi^y&@*PnpN|?PL5+ET3#x{WOG~3#%WN~Qo z3;ZkDe4q8B%d?R*@t=3>ztWRXgeJYv6(Leh%~N3})J$~lW+t-)jE|z^YefxzO;zo` zmU?3EDZx<3JzJ2Jv3sMO@fjE!tYXO{k}oCzvj3Ed#}>U`%KNUP;eo7jP(LD)EK(r8 z&MZr7!0gk|3e`7Kii9r;me?Mvb~!-*L>x}rptpB0gelB*&ze=g7zf)Mq&9_tPx>IN zj}7xK{G4ZXC2k?5niL`${mg6?F=4p`BY131G*#`+bshU3+55MYU9}#Bwm1J}R5^=# zb_J%#0D!j_0D$v%hI)9ud2RjMBHEUzSs5Y+dGkL{3F%S0B_?K zsbD&{v?TE?b?y#bLbbSdw@}%Nb>smLP)Iwble@bk z+|I2M&!(7udb-f5*^JUQh=8s8mgm9q@C%Fh4H$3-2;bG>RzH)vn)R|h3R5Z zbm~Ykxy<~CXma>w@sN!b#kMitfcuL3ZAV=$ z#K*68z~iV;*R#+%B{^S*Pp@o5dni9qJifR_x$gD7WG{CT<+y@(B0$Tw(T0{%CHnJy zo2X=keDB`*_#nTZQ|0WN4q~*q+Fq!3Z;A zIyQ^UKz@ayEmBRh6^<2K7DHd}9KJ$g4@-`L4o8eUl7xCO+t>`Gidi;=s!E0{u;MYj zP<6q!bcInGZe0bq;az_WCl;HHzqL8n7AE4Lt+v$kk4V|FTs$m&DLS$yA+ged%3Qoy zqhBAw`2v@J$}l^<_tB%c>y2MAk++T$i2pgLEn=n22^di0ALW|bkJ~w>u^pDNilg$e zTnQLx^r%Wq-OsX6HA*O%z2uYTxG;^bAQR6a=V5WH1WKP7f+LX{H)B)hV@(;;kr(yS zd~FOzzpG9dPEt>(ey%t~J^?n=xci4zB7+}3V4a|~0;C3QSwo*_`%kDPj;=T*_1DE@_ml>TP?z`Yye zm17}RAE>lLRF7ii%g#PUOOmn`PvBi!(hC~87NWknyY!=dQ+Acl_9)>fz^d<>g;pcG z^6TP(papSzAz)GT`f-?@%XlOcJ_YNn1u(-vDDB&sT zt#0xaB;VxF&{1d^V%>w{48`yE)l$P@3`F<#M&u8)<#*K<_M& zPhke-6=Bd3tEOK8G`_6Gd;QKzs>CYU26>0lh;jqli1P9QKRjj!?`dLPThIb5+HK=EY4{w*2b zsuT8WD9=vUfST6-d12P+&txz{qR1$6jaE}lH{m(7xx0)nASb%7f8PMq|2W}UBP6q9 z5qI5a*eLgzZ={ibKKWT3j_-&wkhZM;&a4#Vqn zVCzmx{n8f^Qakh|=<^?wHl-Ccb~ITj+RcXy6^T zu3$#!B+noeZRznI)7SQs4fLwFZ>wpr>f5bE`_Park3qJ zFOLXsMT(DtuoCTnTKg@w1CTIY^@W2tD{y*9@M&P97u_&q(9N|4Ooy^ZA#UON=!_1< znzmYyh2e%d#3PA{OF-cUZ~ry2Es}9#@YUDX$R|>@L@Dh$LSIMv|fn3Qu++0b;NOctnDlGXdu9TfNS;o)3vqZ;8Scj zvpDa^8)(u4ZE1kJscF1v?u9#a%Pv^hjh7CCvncLJ7okIoWcn7d;<`aZTF? z_NbrUpi8anGvz=J`qwFrA_cYOpI(o=TcGAO+0aKGkDyRy`*O_Z?Szawsc~u%pI_ za;h(maq>zWe*3QurNc8Dl9vQuss?nFO!cZkRs$ zd$R4rTV;6Y{-4R^H}r*Q^xty5;JIh5WP0pg&Hp8NV-j1Q;`@&T`irW`e@*^D|8G4Z zccMkhGU8J)FqK8X5nK*f1g3ws@9Kz9b1`#Bdy~`dL8qoE{-X(D5vw|J#%M66=0J72 zEvoWn@k)r(Gi<@Xv&qLF8VK8Ld$2a>H2kSRe>k+9lg-+^_+>TEihGIcKIkrb+rvnY zL+2F-{ugjv>uPODiqiQ=Yh9VPulfZC$t8-4WoC69cU&g8El_#>BCOG&I>L@YpPb=JB-{ZG>g#@vCb2ww! z+L#$N;R@?Djm9-rSJrX7RgETDbaXpc=Z~LCi{P$yp016z2|SS;C&+LLnv+;{UXk7x zIy*e!+pPN8S-e(=r=yl#x$m1_Ia^pALX|FER~gYF&TTLl`LM;poqIRkCRhE_{%j;| zJ!1%yV+X6eNmmJ0#AtV9c71)UGFcs%JriGevAE~W0(ztj70xUbzZz7y&wIa1hFtws zrQ_&{vtRefe8SbW6CJq1*K*N#%?Q$>#m8M8R3(>ukkav48$A*K`0AC%y@bb#jND<1 zh@gP8mT!_uXe`c_iASb-QoY4_MZ#XKMQw9P8$ZHlWw-pxwX4@#45?0efeg1-)thrL z_*U%iF$e#1lg0%V!S#ZdT*EhMc<_Md|A;wg>EdqvA3x;rSCn)3W-BI9LoMVv?%jjHQb7zK+m?Tqv6g*ymZ1O1@%V~$i0L_T?Yvfh`J25h@w>mn=Gxahn4@f~3S!PRSmtn8l zEZ$Cjs{4kKE=9KGu}@nEyG5~S-J)^hntJI}IwiwcZ$9n!y-Vd=4)UFa~Q zo9;LhGZCpueU=%zJ%+>d(om(*+?O7i;r*x^&7=w1Or|BFO1|T4j+qi zf<2jD>V0Tnn`-=l&#Yd%vbu{qgf;n+?VSAzbihk>kk9SQ_Nq$-RCzj5MS?@Xgc@5Uv5F zMd3hy(rvy2k0v;CDJ!H`lO);2{Kd#jH(llSiILG6&_kaQD#(3iO#G%s&VYrBZd%4d zgE912cuRgV+Qle7jj*{^cl4e6ewP_vdd4Z2;hoEBhRv$!`d%N&iV_QCQO%vq@CVx` zHP&G!(*1?|R0iq!v}StR7uFO#z7ZUb8&St@vutQJ))7>5rcCE2)?&B?c4&_?1P_+< zIqbO#a``5RBnm`V7qK#_x>3GX3sd!3>b8=4?^4-iE8We{|2SU(hL&B*_PVLrivA4u z7TA_plNSr`P-^5=NcK&n_sW0r4Li1X-q^<5$}h`TVe9ot3$?OHM=+-XiK1c9mn}XL zwyTi)Y_0%-55Wba+sKKe$hEcWFK&e=W*CI@m1Vc?E#I3zQQv&DL=$ZrIy;Z>^&Iat z-JNaEibgrbQyfp~Hz!Zo*m@QpR3=#ao*hf2Y!k3e5Z6v6oVX+IN0NWBQS=#2bMH$V z$ZiM

4kL@?TUacxWBCwnnF6I6<7$RxYEVyp;wIaY{u-bLObgUuO z<78)L(>;!&Y^rDWxm;P3rgw-+xi_+$19W2(}!n%G;pZEJGocc{H^pYAUG!vhHp ze!9+svm$K-?CDNY7uHdXne z2L2OX+OI5L|EBbpb&XsWpFau6u7{cLwD+Uu8fzO)U@jIkCZWt^Wv;)~F{52#-da1Y zO9D7aq0TI-U`i^)WhRXeiVX^i!k7ho3l^Uduix^vb_Y`^5GjL+i^*(cqmT8>64SjoQ8`j>(#q z<|DzW{=v_ii_o`7q=O3$Ns|{WpD61j6bBDV9akf*;d)}zMb0A8Qut>o6)N;d_?I9d z7`W<|#-|`EAn_QNNphD>P?+2fWXN0Fp`FBs;vhcV;Bej&%b;6+7ZkIVvm;c5nTO>i zZ7pZqLy8wu?eLLMHyJqgx}{`8F7I=B`Rf<2v9ZDwK+nGpJdw)>3p`7}#Xu@pie4hZ zZ`QlXH={Y>4W>x+r3@lZ5h>j>xPIr;sEB78$z>f(#)N!Zg_IzS6a_+;pG08JpgxJm z-M%fP-jG8(LnLnGuDAb~r=|5e&W1TpoVNceN0GUw_?zr|GI;hi;eyutAw~N4r$2HL zXT7dYdL~`P4jwv%{6nahCxc!6P=O|wXzzFTO<&BTJ!{G2Is75qN5-9&5WF;}*g~Jd zkaqIrXntw>-r~9A#e;YMgm+*+AckY^n~k#rVyDI2!#nTxSLic={89vE2D3a;~=Z^NO?7OX*<1)Z$?*0A7lzqfw z65JKH|7W5ib~^X0)&AW}_nSKXMcY~Nvh=K9TR#}Cv@8;Jzb$X6XZrB+=SRG&AAMgL z89QThJk|_wM`O+Z%fLqON#HQM0CJ-@v7lhI%?k`ND3}xMn=h>>us8fweMDh@#u_5!LU#T0eGA0?_%e_$) zxS4Ctx3z`8d8;$*V)vwnIz5d_-P_D-?UZ!=(^u_{y><>R%qx{Rr<<*D=gB@d`CVc_ zremi2%yY*b6)*01zVCHUTlD(*>;)(EU;ol=7iYQdR4th1Xr{T2|HuACYp>sougd9b z{IVn?fg?=ugns4n*e!DG2cNIVu+h3bgYk(p*Ljzmt8$ZKxa=4|K1uf8BUa<|Xjyn* z<`TA3SG_(ob@eMoEEWw)f4cYLMfLlMrR%p8z6g#_Wj%OW%W?aS#=1VX{z>OFR{qG5 zv8+)4!Ez!hnHz7seCiPI-T1OoY65@C9qu%q+I&}QvaVb~Ekmze*vUT& zUdC;DHQQ^#?rn+eqP;~YmTl_nxzW}VXvU_SdA_D4HL~-pmU$u5^Hya^TLHUr^LZ2Y zm6e$PX}_78S@y*sQuoc2bAIW67JA+NGf&RxasFrLA6GYgPB46;-uzcj!YuqM_+J$Mnpsr2bwy_H`KKa_7oLxI`v*KB z9a0l8GU)=ROA)x>0ORf@bnWOjw;;5O@Itkt-{S%?0{6-N2qVIUah~Ik?hf=-5(q;K zf}rj|UNeEN9euhLp4>~z-JrLHKPtBAha?toK8jx0@$zu zx_0!w9m0Te;H3uOI7I2`q3cI)(;)Qk&xYzpY2l#jN39YNx)~S_ +

one

two +

AAA1
BBB1
CCC1

three +

AAA2
BBB2
CCC2

four

five

+ From 97dff054e943f88698e748108b1ec99225fe617f Mon Sep 17 00:00:00 2001 From: Chirica Gheorghe Date: Tue, 7 Feb 2017 08:40:16 +0200 Subject: [PATCH 18/23] Fixed issue with multiple nested separted lists --- pydocx/export/numbering_span.py | 44 +++++++++++++++++- .../nested_lists_different_num_ids.docx | Bin 29989 -> 33169 bytes .../nested_lists_different_num_ids.html | 9 ++++ 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/pydocx/export/numbering_span.py b/pydocx/export/numbering_span.py index b99930af..3f3c8072 100644 --- a/pydocx/export/numbering_span.py +++ b/pydocx/export/numbering_span.py @@ -416,9 +416,8 @@ def handle_start_new_item(self, index, paragraph): else: level_id = int(level.level_id) current_level_id = int(self.current_span.numbering_level.level_id) - if level_id > current_level_id or self.inside_parent_span(paragraph): + if level_id > current_level_id: # Add a new span + item to hold this new level - next_numbering_span = NumberingSpan( numbering_level=level, numbering_definition=num_def, @@ -428,6 +427,7 @@ def handle_start_new_item(self, index, paragraph): next_numbering_item = NumberingItem( numbering_span=next_numbering_span, ) + next_numbering_span.children.append(next_numbering_item) self.current_item.append_child(next_numbering_span) self.current_span = next_numbering_span @@ -462,6 +462,29 @@ def handle_start_new_item(self, index, paragraph): ) self.current_item_index = index self.current_span.append_child(self.current_item) + elif self.inside_parent_span(paragraph): + parent_span = self.find_parent_numbering_span(paragraph) + parent_span_last_item = parent_span.children[-1] + + next_numbering_span = NumberingSpan( + numbering_level=level, + numbering_definition=num_def, + parent=parent_span, + ) + + self.numbering_span_stack.append(next_numbering_span) + next_numbering_item = NumberingItem( + numbering_span=next_numbering_span, + ) + + next_numbering_span.children.append(next_numbering_item) + # add this span to the parent list + # which mean that parent list may be a different list + parent_span_last_item.append_child(next_numbering_span) + + self.current_span = next_numbering_span + self.current_item = next_numbering_item + self.current_item_index = index def find_previous_numbering_span_with_lower_level(self, level_id): previous_span = None @@ -484,6 +507,23 @@ def find_previous_numbering_span_by_num_def(self, paragraph): self.numbering_span_stack.pop() return previous_span + def find_parent_numbering_span(self, paragraph): + previous_span = None + + num_id = paragraph.numbering_definition.abstract_num_id + parent_id = self.child_parent_num_map.get(num_id, None) + if not parent_id: + return previous_span + + while self.numbering_span_stack: + previous_span = self.numbering_span_stack[-1] + if previous_span.numbering_definition.abstract_num_id == parent_id: + # we found the parent span of the paragraph item + break + self.numbering_span_stack.pop() + + return previous_span + def handle_paragraph(self, index, paragraph): level = self.get_numbering_level(paragraph) num_def = None diff --git a/tests/fixtures/nested_lists_different_num_ids.docx b/tests/fixtures/nested_lists_different_num_ids.docx index 304c035f671318788c19ea401101af754153130a..df1fd978bec038a0c8cf033925154d6d433d6d64 100644 GIT binary patch delta 11253 zcmZvCc{r5s+xH+OMD}cxC}o@MS*N~|5VB{T3Nf}R>tMJ;3`t@_5t4lw%9?G+*h+Rr z*=LaKGlanyuix`L@AG@!=XmeqxR2vJ?(6#JKF{+y&(G&`UIi-*$1?x5diN{i*?2 zFJ7C=+I`=591jmGso#e#*8=%&M*hxK4ZA&_>#Kd6t>M20EV8}XZN9VJc(daTyX5a9 z>86@#y6TEQV-FLwyNK+w-O?m>zNxgaYcC8_7`{qKo|&?ak}j_xhA_wn*AO(nL?!9T zdiZ_mY}~k^C7UEon~ziEiyhFuwrqwwTFqaMPv{w7 znDc+;c~qC~FH5b{%ymQP0Zp>tSNCO7X{T)VQVw&+5R(3_x%}mA7vD`|jTQbVvEC+s zzx?6gmWkIoi^Wt*QPPTiSIS&x3#uljxyqIA6fNiYb0Qs1 zG1G_ms`#gWZ@~vXx_gR$;a;)uP1wEM8?5nV!{h9$iJH|WV=lFP?-5}B+48yEeT@3+ zrc`q^r!4$PTfn%Kz>mC#4`Ltc``+!j@M*A1!2Fd9=OW*ADBcyljc@zp z#uAAYkz^9t)#kk)2m$*CJvcV7>1BR$pWWW))lg!>dk#so&~nvA1#6C!yVaH3{OFwr ztG?=mf#Da^mO?z9+jrC@0~>!1?#c5tyz+gtJyN`QNEfR0UW@7YOUkwH7?X_VMtrbY z5H?3Nwv^i}(aMVXPkj}tEf;F~>DR7Y%fZzdtN7kwpiFhG;fp=NZB$rN@E@TNXN+l> z-?GhGxA;@tR7d1<$Dg&$gKPsz>5)&$CvjS@d*kAX*_R#7Dju3i3Ib}1N;4Av#AEXc z^9ETzd1&0&)SqRyi{mjm4~oaPN7(f{Buj-fm-(q`QUfsKN+Qjo77W}oz=s*eoGQL6 zuN`AI%;?z(_%@hf&}nmD)n)km2-V@Fq`L9Jv)Zm%h{{*z!y?M-lDY1?`J585ym8TW zLiedv$xj#{rF3^m1<=>ES$yD}-!F{820*T%@ET=C@;S)db=08dDx>GkEY;icTK_aI zhb;AW5n8VK_CLzelkc(48~vx_CAY3a{N>%xiAO~9v)aU|J%~PC%Ip)bjSM5b z6+?R-C6<0ov6V3h4h*WB>9O?xR~K=Qf3EE5GZGc$9U*19AIYAkgtK z6X>2X(3kU<)vQ%H>)MGirAqU!8Fa&`X466~mJbVBjsQZ-$hc_j!#_abkC+nAf z-bh-{w^Mohe%fK_oS&!ttl!>a`opwT$%d5O^`vygR3L-#P!62fG}3SH-Vp6!*&lT< zd--1-S=;7j0t97D91wMY+YiLnOon^T9CieSrpGmF%{wWvPm0WPPkL$+Fr~evoxy%} zQ=}1}S-@-Csmz0r__xZ-lrVS}(iPeo)qIswivM~H;=nN?1v`&HBI|UvRVep^F=;a{ zB=b7$+}r^71U0N3cea{RM3WRAoA(TDjiZ^D8xRxaKh%;p(LOE|ReD*uhkU0IsMr3p(N}qDZjF(zC?=%u<66W-ooR*zYuo<0C@vX!0 z)oY0OXoP*2FlU}TcWbHMMF^r)Es7Zod(C8dCoB}@Im`II~K`E2L9ms(5mAvMew-UO= z1%{hV#FG_r#ai-%x0cIF*)_x=k~Ou)MbG-{$YpV(DT{CNTIDiY!?Z%^08;#&1x8Us zTOfjsJ^|bEI|lKzZkQf}Mw!}c7>14P;3y z#U+t!DA$ILK~*-u+A-)%w^T-UAxbCTLpCJ%C~@vkvgpsmnVA{DZCUN&)_fl#b6vWk zu?#ja(6_U~ib1)J<9KgUMDS{w_0O7N6v27c*ogDAn-pAP0ZMI_HBk;Hpin?G0ia#> zf$PJ_ymoFcaZyTo9F}&;iwhsNZftEP?u8Sp!qt%Lu#l*w)*(+B$4aY@3u#UMUcRsT zE+1_qw5(sC_`tfLRr%SX6rEL+93=uv?j?b{IG5rcKdpc;Y1x!@!s zJszAW@2kyD;dMUB&(oYqrvS>P31Ul- z0YBcVfyvh{a;+ZhXSWto(}`pC9ne$1rf3S(ynLGF2%R@#NS)4wc6vXHd}YQ=HRuMj z1kA=~7_F4eW1UBw$FxI8oL)rJdg8Rjro|YqU9@S`3WD2XFT)4uEHKG5*_QSH=ZJqI zH9Nu7DnC0b;4as#qwLxw#+73bt2RFu9>(Dn!Age3$k)PR8x5kg4TMZwzFMHJjPD>~ z?JJ`z@{?>S%&8=#gB>|7@56*=s6m|S5tB{T$a*dG>}q3wOmNYPt&w4gR#W|^Q{ZpM zZ)@2lvsO6)bxcgZR&HP@Qi>R|_!&%v0w2T3e`kd#Ya~we5025UD?OxaHf>mii{qzS zrOtb7KGLAw#ZsJ~j79&HQ(4S39lTv#7M^hovKCH{ha~<*+m8oY@|QW(P0lt&U?(<% zTN>>`7?Mdz&&9fb^tY-v2Kw+-S zEA%APt0k{GgNJu~MC?9Cn^N3kebX(I@DUe>y_*e4UM6lOD(?f}H9LRCb{vAoy#KbW zy%rCKt`(4H5NB3#ENDv#HYs|CD_#lZLLNu21@>O|i4aAZV@U6zqEMnDS!W)uGiTfGuq8{p1Tnn>5i3^9Tlt( zD``9nRP7>>f*P2aS zN@!~2tfknEHI+#dW&M=zR_Hq^A|)-{g$*mWn@xp?!D5wvj(Ei)mcSVzMdx1;@OP0Hr zh`t=jA>K0BXJULE7^~lFLDe$l6I8Rn_(mss1BuhtWbm((!h~_3*?j^88_)+at$yyC zc9L%QTGH5!wE&Ow-zj17dGV}rZNi2y{jx1aA1(8dop9Z5n_)!xj)|;Qb?sz9ujK$pj|Tx)gu+!fb3k;VGGZyDPg#`Q?o1CFGH;| zcV)O!Jf{LKsJ`=k!gO+GTBP2`cV|R@+6xh&xPA=s76~EPK;DR* zVWs?amZYerRp+lxY-E)nVfYlPQ{XJmvZV)IkiwF88Ji1le3j|tRp9h3IMAD4Pc(&| zQI`RDZn`zDw4PevS4B)9=F;McIo#CNdz8d>D$t(IM+ROs>jlG!V3CzzVOX?waK2~6 z1@i0*UzHYRe0s*`!GggW6Mwt0o$KYUHDp|Bu2(4fh1AfxO1P1+?YGP8J`QltdVAGI z+oq~<=k!LV5t_z)(iv)#ImH)4?0Y0;VQ>s;n;_&6H~H`!>K+-7v87a7{EXTNHjNa9 z^r@V0CfC29LCbH36!wL{Cx8!ASjd*)YHjW|?kPRLN`NgqLd~nGV#zu82SR)1#TgyTjUgoR%$6A30a|Pg#^utse00B$7I+xy&*b10(kzz7ylM&-^L|YbeQ3uZB2M2c zksTj8Q$WrkVUI!UH_5G5)M&Euq5p}qRE@!By62`5cqosGtQM5}kGK+fIw#3Rjee10 ziPY1lm}GzcKrO>RnOqg;t`7&}KT#jjOo-F2WBvvyHb81tZOh2;!1s0Uy>SO4iA;*+ z*BQPsEKU*meRB65)r9CIOM*R*V4@m#3#Y%TY2~D1+NIkLo=~dc6Yg=k0~Eow-4r{N zcT)HE&&etMVvoAL1NClChb#wFs_MVqxEzEnEek< zgSr!J_mO5q-h749Bf&(G(EF5`n7nU!iNb9$pZZN@5IZ8V&aVgga^lP*knyx!EB_91Ng4(6taJ|P~`3G*>gj243RDe^w!EA7AL2PpKasn^=i98 zJws(vP9|c_r$>xlcVOkplgo@%k~@Xu*uO6eFaENACU*i_m_dC1fnA`7H&8-@C+9yW zs+Bi_g__<$*CsnhQq0!Sk!1V8N>1x&jL=B{BEdj!51ZuXTfeJGH3gszx`47e2^ z#jdVxp=ON#mOg(km7n6Pu$$5tQC3QCj2uKsQ-cdp7pQjR5u#cG7MatL0c}49q3N8| z;Cw2&>!6h;C9XVPGjNI;Y(gH3ttG7kYr8p<5z=Gw0X>zqN3gXtpmoB&d@s1w9o@e* zX1Hzm{hPnqT8+`huCar{#L*n;>pbk2ZHKsG^lmTtE?sK)A=zL(YEO6yr^Hzdw0BZ< zNLo3+R@(s3MKcz-4_qy^SRwiZjLqMnCKJKuAo1E7;lxdW)rpD1!~g=}pkTVBU~Rg# zXk_i1f5R$vrxlR38I#}n6g*37;q+>aJPF`tv)3V%<`@nkp3R&Gq7H21v_(p(TKY{C z=A@`*b7Imt9&QXF(FJJUleO6E;WC+00{NB-(u z)+)i6NWIU>L}WJAO*Ad@RTfS%{0+x^)^G<<tB#thEGN87SR-Gi<4iIR%GIgT6 zvyl+}Q7y?U?1PuEO=K}Obg$!+XQLnSKZ-^ZXj_q81gbbhB2-vmWZY$Uk=Mbxl+ zV+9o5L=sqxI`VZwpV~M|1DF0enD|MB;b*RlG#g;4K`>63L2fsRQwhB0kmYFtyED30 zDUqxorcpez8eU_D%lPt3Yx>vU>e}&ZA!^BF_T9zW;q?n+x+?IInQl@J%knrc944&o z&c8xa89_0S%(zz7gM^FXZ~z-J^RBo~6{Q(^w%PX*fp&?+4v63Uh%C~hh}Y)nldzBG zZHr{WC5MhuTa`Rl-#OV+hPuXEZ#_Gh-wh_Gcfr_@2KM#TWl|YJ06A$+(cY^phG$D- zIxL?pi<4GpcYsL@X zPdV&ar#9K`joXGfPZ1K=gN0^ixWY>?6mgLgI4gSca{Par-#JMsU;z+>&}Ht!gCOL3 zLnn)qI`7%i-YpOv2;EJ^Wp7g-_aDr5L`aZ3-{|s=?gTi%1Xed5*6gi@(`1 zZKLrSzGfrbCm0?+Tqdt)#FKOWNV^I4f`R13SoZ11wSx1y6^&OeGiLEVG z#%F01q3F*4Z__ z*0y2IXhuzlO$qDH5dbUsZtQ*-29C*Lw54-H`5gh2@li%C_VQjgGwy_AR)cGu>Yq0U zhm+H-8a_L~c*MZw>7?|veZ)NU#qNwawJ40-q({!_I0nUo&jcovMU#NojDB<;blTt8 zx>sv*I(=iKA>BQH9UN#xd71VDHeR_zz4fF)S4@H}Ft4xKFbtkhnT!95 zw01D5l@T7Kj^iiC%)Y2lfY!RDXoV|xWVC;KS_t1fIaN>Po2BzsGg4#oNQZI#$wkN{ z%j)QMI{2y1E;cYELIuV_een@9J?T3ef%TbmSTE}2-bs_}ee|{Mr!n*NZ-)BUICDrd zN8}>UB#tNzFJU5kwdI_LBAQwOPO7!GNkPQm-Sqgw3scUH(=#5gn-->t86VP?;jsR0lESl z%BIpJW@h~7nVJfy;Ao3CB*`+j;8Bj>_exfzKdKCoD^-ua4X# zYSy_-C@d{`RxW+@>9_YrHpXGwpTs{oEGU%r2c@09Rx#yzg-2-2u95572i8sgh0(}; zfCx-7k<{aK1tWTAd21np0P51991gmwdy2f-6|U=}il!+Ew30TNk@cMiv61sW7E4|6 zB3{C# z)~CK%1(NkUh}EV2F9+(+TRp(ytIjIaAtIQU_JDeB4j#cpO$XX@erm=x5%;lcx-zKC zysZn zHB$6cLXG2EIWYAKlhd%m6r0+NC_+RgcG-{ki_qu@U^M!a>Q1{QI6foyF6g;i|Rcza9(+ zT5vBl;!0!{_TD^7>q98i(R{(d&`sZNFigG&Tb?FymbB)$xCttDQ)8SY50CuGyI@MyO{KW$2_lkRsu`B#PJnl2aUU|NB z%5#vZ_Tr_?jUroo*e*`fJl z1a*^`2qrPp#4l12LM@9miZ-SgM5}nE zcq;B?S~$R_&2AGCS89Az#giNg|K-bwyQE`~cg_naz>ecX!rB@)J91)XxTt9e)ylX} z9vI{%exeIYY-L_P;A(>=WNT2DwX|1WByP{dWa3L~3w{v$A|ZB3qh zG`E|p9=ns}Pb?2Q*j%GL4KMq(s;O2+88Ia^Un+oWCgIZw5Dv^F3Ptc)t7#I63!LSo zCUokmjF>;!OXp{g;}PzoZ_6?jF*s{)gkwp42;4 z;7Ox89yt9+IR9WK)4e3odC9hg;2S8r$xF?%ZE5(@BHShCs}LCeXxEOWRhYx=Wk#H3 z`|i*_Bam*`>X8zaP0e@> z++6~JGJA*HI7PB;e1s@99cAyflhO}NR7TQVNJ*UE_ZV%^X5)eUwH}w>e*cJh)jt{Z zXT%9?mD5{MdJ|nvAIM=*-7jhyivW9IRsF%Xj-C+$;}=F%OkAu#H=(vR)*oYs`=-l$e9yW^G_GJ> zPr`vSsIq#7kx>6fh#B<%m9@wIC19S28p7_o$gs>0)Z;d{Ex}5zgs{6+}GXzUz6P$o5@!b*M)E}_$sPbz`?$nag z=yfZ6kkuci>+uX;s5oy{BiqA`L#=q}yO}m?Hj6BW$Mo%QA=|skdC|OSFHYvgOB zbISP}x8gOPg-M(rev*oo(A#Q|nRfid15Y_;^w;fA((TV9rRS83QncaLUtTU)T=_hH zTk71Q&bOzB?w1zigG7=GdSqi8D&)^zDx8a+Te)N82Go8&F5_&=v-UeM=a#-LKl@8$ zntQ{phonyu*n4^7s{X6gC`Lt3F{x%{(TKc1ox5T`ef7$%BfqGZnvG{09GgTQr!wjz z;`1ZL4KF8qALMgJFMh*Evd1f(abhEKv`H?BPi!S{dp|KXlJhU-{9B{$7t3;mEN*SJ zd9QBl<0&AGml50*!MW?#ws{V`SOq<=hm&&+ch%v|)*R`oz5 zABxI%we-kj7(4}&&^SSA_?>I~Z)j zu^plfIMSlCeh*E5E538V2(RdV+>71Q``r5ZO^ z4g^Fm`G^*GCvcke+Of2?$DnO!Zdx;em;3iREv6M%3y7`E?7K1{19_4X>b?!7Ks zOzK2q%C|=gmfm{^C5Ec%d<^aIbpA1I=Ag_cbXCXpRNwmn1vG+NulLSP=Z(I%77ePB zvFgQgnTCC5XMW@^JQ>YS9(p1>x0;7@Wo_-G32%#(<33E?;+k5|&D&24>J+?hKREP{ zF*~4T?+Q0JNRXFTtC3v0W5@=?7CcOf#R;lZn9E$!{Nry(^P{L0 zv490-zB$R6qC3Qoo#m0m$%kP+5xmgAr5Ul|;D@#a8y&Fnsetu?mQK`}-rnP)tSg7P zA;%C9=BX`TwTevvCtELMA|ZqU1fo`xTqGF)ujp&s-LtHRXH$X`?L88TV*&#pvK+sE z{&Yxl^s-5mF!VawO|V~g4Hu$5Nk5!E?gw}(``w7|Z@``7PcdgFiE`IGa$Mv6o8~ih z$*)EtD)}SN>klj{_62*vimB(Ye9gs1QA(mUE~(F772mz*`|{;Af(kmtV}Pe!AUIVf z9{5}`;bQnFa*Y9vgLYpMYLAon!XL0$9H)@-@>R-K@e9*u<{Zp#F1+lD6r7Xwx{-SD zSkHD}KW$2d$M>V&)BJ&qqz6vp&N{32nt*4}x`E2+%0|Z;XY(8IA0Dn>u3ku_sMaLw zMuan|9#P)@d@FOI&|+D0>4hapd+~wv!eHcyR$F@nGI6?Ei0zQ77Gg5tPw#Sm41M7D zU^&X8y0*S)zD+zsq?)eelNrT}fZY*R&~p7TpD=qA)h4=XIohOYHRHb8?HxTl$R$t# zo`A6bHs`*DJN~tCeYy2aS~@2^?@JFYM8(0BpEYu(aM{5GI4REmV*-weuw^}SnI3jA zof!o3)59b^J`XrLydvG^J4$+^bHR9jE$G+Jt`E5eCQRlA-=u~E-XL6FbO|?ouj1rr zt&VeLRkl!jedNmY>z|cK_%^x8Ii74uRCY-;8%|;SL<)A z+;E+5h|We2SDKB8Sgt%zj`^LIa8A_7LC6TPi97n;_xPC&FvG#(`T$M6L3}VjsRuu= zPrs3-7sjR|+q@&H)h6M5>qN_RZN7h9?Tz~Nx4km=K!*#gKVu)>p=sau@QE4j&q61J zY|ClUiy0rJw2Ia2JrzE;D{(Q4T@-0^i_Vc9uTOrsILn=bqc<;&x==hq}0#5ge~6W`%GQ{HA+2r*6*hEXzFE>~UEIE6r zS9tIS7&HjDDZb{{jC?va&AXE;}AGG&9dj`?A@7NC2;W>5EC2esG-r%vto- z&Edy>Ob@DVqzQyR-i)tc`12qHs`=BYP)J|ml0uEx7Fm1Fdamum_F3PzM>@of{IRoU zteSCCL1zU89{L1J2eH443ICDe7kg>q9~;do#|jCtrvNkI?w|)@l$Q`dDL>j?Hny9w za>8~^B3xC*6dy*_;_LiQQP5Wmxq-n`ynk)&q8dwW5h_~JY3 z5zs}Uc{+E&Z+GrE1$3R~?7Qw%_T^1N?-yrD<^Fpz6=Rieo<7!7?!P9hK-^I_)qP$2 zSNvT@`Zj<6~wp4D;C(vwR%^}z>>2SyJ^}|#h)8%Y@gz{TgMCR zt7UCW#tL+5Nj&tGMk_a(R3JQf*fR0+(}Ay9;{`yus(!Mt<<3o!Wp{q~=Y}3D8zTuf z{&R1eytHGdqIv$zzwR4&)r6cIT5~gT&UnoSs(9=DgQ5HTkL5pGrGxMlb5L^yH!{E= znE~RCPcCF;ZX0>tmbbXUfz%iytLR9BvK1J`Cx7STdncI-*dPpK4hgcykPefxsfn(M z8XNFzoR=|aV_Io$(R+P3KM8~^JdIovi3~u7z=ZPQd<^7+f98e~-B_C6hr8>?d`&0h zw+~voOpF=0h=+y5JY3=37Jq5?1npRVMv}|1?An<&FaFzbB(}W=JudT-k!&AhE<7_{ zag93R|KVEb-sQKTtZwI>rrL^!cSG~&c3HqpU2%S;guck1?QV;_Zlp8m>$#tI2Y-(L zU33idkc)Y{J9t0LV~ywhxVW&fM4^s~jr=@a3*AMnP`>IN_B*<8`S

$xz^a`Vo&qlHy@qBq4P8qZO;<3C%y6RZ%7cy34)@Em+cw*~@TJt2NJ%nVp ztCoGL%x>3bZx%EoX6#It>4xj~_WaAXUa-Avt~>NM4j%G`M(mFKl%!alqt)xZ5Pv@X zmV000`Nxi@-mAKFlKHAJV4v2vog$;z%JT_yJ7szNONhUF-%lpNOp5F=?0oPstRE;@z8F z(TLK)hHTNaUCMfk`uW4|{$gnr=+(b^=I`ZE!X|-KaS*y|$oH;od@icH z_WA>X^n-PgT0@CP7ZVyDu;+|_y!1l$YBl_Gbn~3sfULTq>~0QZ))w6pVQ>9YpY{1c*m04=Xr$&PX>04qHtRaQ zqgRXZ#hh!`Rtr)j?{P_t{jd{CZuFFhl<@t4qn0ncEQKg)dOiD6eL%?I^YbYF!tA0G zGbPXxow6;{S@iQ`$Xy`qY@?N*>6rRz_Z7)G54Iq-M~Ckbk37BFi2OM)^*JS(ecOBL zr;ohV9~{Po*{=`XyQ5#%8q&D*k;-=dumamDp2BSQNKWM2Gp8SK0^GiR-s@n!4#{*m z#BpgvjkHH{oEOR|6bb5q3hRora0h8{X}JpJ^^<;J#P+D&sZINava_8sj(dW{hKEHY z?;ftS37=DVCMr^C<5$!~)eBWF#LBT$v50A=BVyqAK#b6xaV&Vi9stGNZJ~X-vM*B6 zYd)WAW_J6S0fT=!apvDQVlhR2W{laKm}{JZ7%qQqk$+!N1Azoj+W5a8B1Y3+M)ZF( zu79r|IANao>oD>?!nFF|V03fAFa$`5{x=X~{7-`R@9hl;jBbFW=)YkB2*mU6)e{Tu qhY1hRWxR&Ov;;_r{zodFAi8fSY>)f@*{2wWISRPQq8jn9um1RmefHTWHt<^Z4pEud4FK=REz?B|0@2ZeK(|34 zke9RT8?iUe)*g;FPVS-z$G2-HX3m+i^nOs-l|~nGfDL6ocDwdfyl0ku0}H)j8HS!R=euImesC3w=ztqWWkqBIIat;Hj|@;Ri?34v*J%PD)9|MTyIfe3RWqw*4tb z4^K6bc|hu!$qOAnE~DE1)rp-`|M!F9fpjK4g&|cLcMm93Nz7_MQ%n^_@d`0@YM_ASYOdnEbK+f!wTFM)ia}1jQ^c(@L2sg|gAj5RY+sW| zYt5AlG-jm_F(w~&Z!@jrC*d&4tI6d1v3lJXpeZ>5B_WE{G zAq;p~b7a5mkYMG@Qsc9n)|s%%#SxS)V>94--))<8>`ZQkBEZtfF^C`QzFARY4qyF~ zJkkwm5K+4p_R+pIff( zn;*?E{v)GuZ|1=EIu~pPmaMncdo~Jkao+T6y=g)Sz1QU_G!GufDqj+7V3qP%gC}jD zn36fpSIgfLz6f4_qxSVPP^qV^FDc>f_3Ap9>LOrIE0ekX20G>0F98N$KBdr-abbq> z3^Mr8yIbH-5yQeN+O^+w>e*1JpZL_xxYT>4ebP)4phSVHht;MsLnk-2*!+g-&zUN} zP#Tj_^s*!jWhsL|S<01v=xFHy#7HMJoQYiySKH%Ew9~>-iCwD2qPiO%N0n)m4iuX2 zwlLvjLf$rMpraXwtdQvl?^B%+BU2A0*_*OWYUtXDBPFQ*n4(p&gS<+pushhLBD{Gb z(J{#rUTfF9H|`)~-rjXN22}M1U4iW2ViYORPVnZZ<0}w@`H~)b5s=g!KNb)k8-FyZ zD3ODRJG%mzj34>F+x9Z@2EwfpA6HzCBy4si;(zW9KCn<4q@r%xCnM#5>oq1HZ z4OLv!s@SHek16arv%#UZU)?_jYhT_W7*$Cw@1`==GYqg~gA5(nm4EXG1vNvpF)9vXJn0w1l9gQTod-g5AC z!Dnn-ZY;mjr#IQzbx=k$o1lSEmB??Ri8?FM2aTxXBon zZDMcK!c&PY32`hn+iG}KTL}X{1y?Z&_#`>Si{|F{=a>rbI_E0q=O@diU;b>899k^) zf-g;vUF=u4En!E32$Cz!RGpp5RM=lZXcfwB_ybrfJ9aoWcMHt^Em1}ivcMCJ$~K!b zF)cbxWR+oZ6KQUGZI&FC8}qQDv25PEX8zX|XvM$r3CfB?3ia+j1!_=@o@fje%gPD3 zB-wRdfo{TBKJ79ZC54>SwKTM%44ZJ|q18${`@f#mjm|pA%q0|hdE{>;nc{eBe3a0O z>bjA{ogB`Im$w&XbPx9lxn8x$NPiv#k%ntZG$G`1SG($2%%DhR3yvoJ{>V z^BrkTELpJxU;cnr9>-TPCM;74zy37d?adLnH-B!|b>L!+Nu!2=H~e#VPGsWX&R*8s zEq8G$VLp;YW^;O!X`xMG7)s|(2v_aoLlL914zCfJlhmm{%F!+PO3rq6Ku&~8>Fya} z7>>4!R$9UC{w;H?%=#b3CPaCk4j2^6~I%;*k zZ*9W11lpkaIk8<3QS9a?A0~98Y^kD39=J@_Ot7sWM3mqvnwoP6d{r5dzr#R-^ zR<^OHd*_(CCQT)vH$(;2Sb9qhTWC@{E|yc#6xfr>2+$+U3~?ldjEWMOa94fw73lhm zLsDN&Rbua@6jmnSV;}@)8*BH@Rp+YtHjtg=bDt=!eO#-5U}m9$GN3ldM;UOapn~r+5{;wUdF}yi zh*R`bB#vTC$l}Gh$vA~6TwbbsI85mqlkpN8P9)1ynH#%5uxzp;k&|2k?tGj0a<&^* z1lEN)dSp2?I4-Dr!sau4Jq~v-oXEX6yPh$ma^ANrrXY(`6eZSVjxw}+mKQ8Q#7WE= zU>L)Zu*4dELZ5J;hf54m^u;9S;sEDhz|NThpZTZ;OI9s%+ROUnlGEoA?zHU+#Dcg2 zwO@g-`2d;^mwg3_B)re|F@A*ix5=F|LDZTC_RO&@Whxg^USyfI!Yg&Pq^lvnVqa+g zHZ_&q&NSB7qYsFRUP+s|-|bDA+tGX<6I@zgpHwu(ejl&l+Tbl3k4<|C;FiM)+UF1i z82X__CJj9ZW(t%lt+p4zh>>lv*j9QVGZ9{B{l9^n4KvC_uct)%dly2W0&twEpx@!* z*4|Ppony=YI%_eR>#-5Ns?UMozyMt;Ve-b&1P=^yQU8*WH^nByA(aUS4XM7c z?u77SCsH{8c_PHkZf}Ou5DzxFCLZa?)o|8gH}x?^iSG!f*}WqRI^BwJx#dTg%r`Yt z+dC`;uJR*Qr-luQkXayy`#GEXEOP`?c0Pu{uG@yW44kQBn<8mo4bz&949yZ zU!DNt1naCNymQB9d*bNjNa_Q8y0m{j;VBk@53N>YH*P;^P+x*< z&r#L}JNg(p7MehI52Lb@;@lUL&7T_ zHQ{)cin*oFEb2mZDt6r@o6HMIX?@&!oRHg)~`Fe_T5^ zoJvBO<7+LjuXKyyHOsRQ)~Sq1McIDe66luvw_TBt569&C)QBI*QM&?d(#euUFSr3r z$#^BLU>bgE1joi81e$+jwAuUZ z0{K3QlLW1EhMh~Y@OJPdq2cJA&s*$tpdT~75`C)^qj&|HNi@E^K5<;k(RH#+N12nP zeBT7Ne1{;0256Of3Z1h$;tP{p^Ny<@9b}xc#>K5qze-x4EcK|)?yR{20h>GCc0qo` zh|YG16#k-g*1q+Ik3l*CiBkz~5wfsP<+d=ouE>nL*{Mv1%?%pwNRTeayS=QOCeY=> zTdt2c`ErELAiTgoFYh55F-%e9!BUO;t|Iaz8%ZHu-(E&eR_oRJC1ve9etQx9pt|iC zI(r3bYvIKsvA@q*d%U4Wfk&l`4lTmC-bkNksbo`9kzQ)8IGJdIX(3{y>uf?kq47K7 zHKC;%qvU2dF|98z(!|13JoJ|zC}%PzHN-Cr<@_pzbC(qD#m7}Rze%+48e2iyJVD_e z$)m8Wq^NyqX#k1kTgmN#2BqEcQ^GIzw20u{aKx(kQH(P=+^8WcBpsjIt}OiXJ9atP zM=fUoaGKd^IE{8Mr5%j8q{a4xIOpu7GF6TH$Yl-r7%mD|U4QsFWA%DAHzxH2wn!*m zE-6{E)IPWHp~dIoGN0mYmFeIuOxWd60x3o%48jKl8sj`GG5ik;+2H0EDJ_x&X)I&r z-#k?ln+a#pk1*4~$FH`MBccvFy8@<$dmoQhn>C8<3#~Fn=FzX;`RyjOs0r@hm+jw~ zTOUK>%hMA# z-z%M7;6P^%-7y|3dac`dz8iKR#s?Fn617c$S82?LnmNH5<^?Nv7QwQ+-5K?i9VU+GZkr~Tg+^M9Pu8UX0j z^0eN+EbSR-?P&32w1ATxyZ2twjYNh>nytE2Y7^ZUpTewI1#t_-JRtvwdH< z>L(0*iQ$AU*A6aJ6@^qeDq9~D)1ioEI8dSzR47g^ z?!Oqw{5Le{D^-o^$${s(#3b|}GSKzE>IWA{=ix$L?_WIxf8)RO5H}llcbJo%+rRXX zDx-O)d084?sO44r&)I3r~l2_z)Ye$sc)g zr>pIjv{hDXih1!*=4iNb>*K%)Jq_Fy!Z50Rm4oz>{`tBh8;BcCkIkNtrAZ+DnX1q} zkcqourECwRb|o0lX$?0m3b2@u{?abM%&sOK7HMR^p#rZGyKLOd%FtCj6aDG-i*<0Z zW=lOC`S9JC&HCyUBj+XW2is})F;(h^FCR%M!sb`ryS6gDh>hS9Do+=QPJ&3 z=4M@sU!CX7%~bnY3t9>leyMSbvq;Et5QtT@Qx#tkm=)@XO@*xbQ1ELH4Mx4mk7ZzC zyD_B{IUn=KpFfWEIZV?vq9(3`zrV?X>nhGR=#?>^HZp`)3-Rh*);sU4`h13m{tJrk zFXjdr$!I@X2SSGij2JC%mM}D5QWgaYQIu}qTdxh{{&h|B=FDuk-q6=}GNjJ7oe2<% zu_&hO4s~+y%Lw3L0Cm{J%UiIBon)AAl3SixX*5 zIK>*i>PIzh)~ZsNx2t@u1+?#*Zv~xnkPAf0bl*;j$egUmpU7Z(@{nyQ3i9Z3adSwP zz54SR%h#dDf%eC=6{$Ta7XobsGZnBp={T=9PprS^;QI8;Re?+yl9&47~VdZcCh*x!v0p#pXsD6>(s33c$S9jc znmm>GqCzqyffrJbe)EpnPk6tuV&^~gwy?`TrLDUX{SKx-i2kwo(n~3~ixaR~Xj7)q zQnb5ykMIRt02;MT7x+q^rtS}QD;%&_OBXQ^9IfOf8l-XO|64-6}cZhz^& z?BdmT;u79X70x=y_Qa`CYu9MYy?okYhO*gdX8SQjX2;#Sl+~*tY+A4A*N0>cv?tJE zaF{Wf7$@;Q0OJ?}^SL1VtJT zA`s}b@~88CB0xV-oThhKk3f}m@Vr4zio^sn=;IO=CdsnwfMoS}o>arSI&Wqv9)=W0 zqf1U3uh_1nQ#ryH6h262e7jJD;j?OttWM%}-i^=M{@f8#MitW2&Mt=WEyM)t`*M(5~Tue<$Yz_w)W~ zy+p3rDfuNVH(z1uQz+Z~16NdIIC#>DzejFRnNxj)LUO5de%?^OIV{4YCQjqYNwcKl zdp^|Y$WhV^Ft@$248ea;19s<$tr&v+p!CE@Qbi%e(OuR|kVuCX>-&3P;k&G%W*;{* z?)ASo>hJ9%Q&S~G2-lTgvpN{oSZ{itzy`eH?&n@+`+}E^y8Z_gH%!`*$`DtI&=$sS zR$r<5oEdEm-krYTHIbNFH+$=&RPaS}bO<+V+VuOm)?5DplK!VX zeGnul*Z^^lO!UT`cFT3VbJOJUV|n9plzN*$nHkh5S?}zQnb{%2&4iaACwXYj_`Y1r zR8Zpngyw4l-j90vyhB9c=&(JPDdFqoHX$6-5We60Hp(7LKb{QSL$_*iiK zxYI@7PU%;$r|g>A@)PCY7Tp>t?Rf7vZja16eE?Nt=Zv|n=Nn|IxAyAW{RYmbiY)K4$xs`o97C* z`#3d2RQLP#+xzEs9g7BqYMIp6Q!6&CK8yVM&M>f+ziNLZs29%ZZ;kKK1A-U?+c zv(*8-hLT*nl6umr0pJ`QFQfT#2X`XGtmpAG=C#PYQ6y(g=om?PP|w%zt%bA`h7;Z- zzH{#-*K%gGrCwz|ZXSr(JvnL|mQ8@3Cb|?r+AP9TaCM^F{`!KHWJ&WkxY*|0Kt0^n94R*q8D7M=MzIx)cT94>BP;RV ze)^KPK;D+GZ|`lqNQ#vHFI}VIowsD}*)lP$k+Ad4Xcf7iQ)Rq~x(oWsAGClq8&&~- z^3K0WaD)5ul8Kr&+3?ay=U&Lw-E~wOQhYlpb~&d++ACW$*%_*@gKe4vDHYUTPahr& zl}9S8J|fP)cOy}{Nb44S81>8QIZO?v^>w&cjcc@S6KL6u2sSHCBY)Q?wtFTL!bCEy zj=XKFCM{4xjc?5;o}Ptzzm4)g%Cfh-Zp76m_CD95ik-ZDGtXX8aQ+L?8Y{)fqqmdn zapn}tM7T9;-6lpKbxe-20W6E`$D|H6;8Qf0NhZS!yY=SLQNCp>kJ2iat(Q<6()IyC zwb~hfKOl-MI&PRhAOH5Q9D)x|(g%KZo~NF+W_ThaSw8-wgK`n-`Gc2+;HA3Y|D$qI z5GBI?&KDG^Y}8Nqz746^N!E#Jtg#|zh;;4CsR)ah3I1pxO(p^x@%d-*|FSmek$eP;Ry z^zkJLO3ZJL;-5AxiVn~xqDNT+{CEH9=YT*=e+kV$!og zJwWLiuMtZ8H6u#xgWxqUW0d0uuDkysL)ZSd2Kwj7fHkV%!~MJeBBB1

CCC1

CCC11
CCC12 +
- CCC121
- CCC122
+

three

Date: Tue, 7 Feb 2017 16:20:20 +0200 Subject: [PATCH 19/23] Fixed paragraph text-indent when in span --- pydocx/export/html.py | 6 ++++-- pydocx/export/numbering_span.py | 2 +- tests/export/test_docx.py | 1 + tests/fixtures/paragraph_with_margins.docx | Bin 0 -> 13204 bytes tests/fixtures/paragraph_with_margins.html | 11 +++++++++++ 5 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 tests/fixtures/paragraph_with_margins.docx create mode 100644 tests/fixtures/paragraph_with_margins.html diff --git a/pydocx/export/html.py b/pydocx/export/html.py index 39a69c41..86bb34bc 100644 --- a/pydocx/export/html.py +++ b/pydocx/export/html.py @@ -311,12 +311,13 @@ def export_paragraph_property_justification(self, paragraph, results): def export_paragraph_property_indentation(self, paragraph, results): # TODO these classes should be applied on the paragraph, and not as # inline styles + properties = paragraph.effective_properties style = {} # for numbering properties we add style to span item level - if paragraph.properties.numbering_properties is None: + if properties.numbering_properties is None: indentation_left = properties.to_int('indentation_left') indentation_first_line = properties.to_int('indentation_first_line') else: @@ -349,6 +350,7 @@ def export_paragraph_property_indentation(self, paragraph, results): first_line = convert_twips_to_ems(indentation_first_line) # TODO text-indent doesn't work with inline elements like span style['text-indent'] = '{0:.2f}em'.format(first_line) + style['display'] = 'inline-block' if style: attrs = { @@ -373,7 +375,7 @@ def get_previous_level_paragraph(self, num_id, level_id): return prev_level_paragraphs[-1] if prev_level_id == 0 and not prev_level_paragraphs: - # this is an ege case with older version of word when it may contain a sublist + # this is an edge case with older version of word when it may contain a sublist # into a separate num_id. break diff --git a/pydocx/export/numbering_span.py b/pydocx/export/numbering_span.py index 3f3c8072..c15bfe65 100644 --- a/pydocx/export/numbering_span.py +++ b/pydocx/export/numbering_span.py @@ -234,7 +234,7 @@ def detect_parent_child_map_for_items(self): } parent_child_num_map = { - "1": "4" + "1": ["4"] } So, when we process paragraph item we know from the start that it has a parent or not. diff --git a/tests/export/test_docx.py b/tests/export/test_docx.py index 15f5d077..16fa43d1 100644 --- a/tests/export/test_docx.py +++ b/tests/export/test_docx.py @@ -32,6 +32,7 @@ class ConvertDocxToHtmlTestCase(DocXFixtureTestCaseFactory): 'has_title', 'inline_tags', 'justification', + 'paragraph_with_margins', 'list_in_table', 'lists_with_margins', 'lists_with_styles', diff --git a/tests/fixtures/paragraph_with_margins.docx b/tests/fixtures/paragraph_with_margins.docx new file mode 100644 index 0000000000000000000000000000000000000000..2995cf0c386c76c0b166e7fcb79cbd8cde8af840 GIT binary patch literal 13204 zcmeHu1zQ|hv+&^V?!lcQxDzzEySpBoKtm9_f(y#I#r`23k8i0fCaz<0044;$pvDEDFgse00RJE0pKBZ#T-B` z<{%eCHBU!#XMGkAd%G8T(2#UF07!8E{~rH`-#~rhh)pjmy2MS;Q@~W?qD%{2!RMj8 z7;_@4JBV-&1@SA<^tPvt9UjgCXrhZn{4Z%RxDv~xSGL7QMikY;{(2OcuP&}n-LOSnVB9TY~ zA8?U2ZvwiZQr5|NMr*E(cBYWjOw3um)~1hw1HUkNf$k2A>JgC;3GJ4)*VH4IyW z4yg0R-9hqG8s(FfDZLfP#1!;Ev}v6iQL0q}sT5i(@B1ByIOqic56=ySozasgXSncQ z`wN`i@z1MWyDxQ)4B+!Q0b|Y{Tm$QcjPZO$Qz7EE)LE?8XU~RZ&V)R##Md6@8p6zc zuDz9jK$t@*t_|SQIT~23o}ZxrO8?N!9;5WjQ*es{SoM&=x@qWSZs!bS`HBCllmCYm z@s~$0j~|nP5<&Jm^FL$#vi`vy2IH$8P~|{AU+7ip&?)NaL#%Xzj~ASBjfyN9DQ$xH zEoQCWNpHSEScKwKSDUqna{bUP55C8GrKNjLxRDD_ffOvNn$euTA#=8sXo-aR_vT`m zu)Z$3__GEUw|u_>=`@qauUK%2cV9#->w!G62K6Tud)deb(Wy2&$lve(^7^VGWh65JT;{wMVaF)5CN0h_KyC;)&M01x5r;A95; zA007sFm<&DTd|+!>o0wQ0NXHd@BiCJdBTXoA8WYOLjb4OcK)lNQogkunKjWL0NONx zK(aVN<}YV8;md`2;aVQN3+e0w^B(QD^Wh|ux9fxu+VPNLs!J{06@D=$_un8#6F3^w z=5zco1nn;FTd#;1q;rSdA2+pg@PRp$gc_OmrDDuJCU6{E`85i%hy}K{FUk~`C=Bzz z#<$W#T`*4N#q-)3b0ifL`z+y1*P6zE_~oj-9jPovuM-)O)qiKDwPqxhOb_sH3F;G|i9~dmbrRg|L8b zGe^2_vr96KO^}K*B5W^4)8wqD=1wZCa^#3&@vwws zu9X(oJ>_Gd`MRbdvP6X|GcU?bp4js2KTKkvOufw%%_XZdcJjVObzmG*esi*Q1jrl! z^zF6nts-_X@^QQ+T_#y+)OwrP23ZEWcw=)Rz;;8=u!%JK#<9s*FUJ)&GRGXM`0kaV zttl)eXlw_r9rwkxKO}++o+66vCr1Z6TjN9(310?g5{2YS;Tt1AQb_!wb4}KI$>jEL z#~Rpi&?jsKoW=*9Su0i=uk+Y8Wey~A8_G3VS3la>Y(~DwT|-2r#@EK`&D2keDp_jL zc&bx`{;Ca1p(!v>(@>nb5ZfSAps328FMhh3K1_J_vKCbI{-R(v;`%7`hTkb<+)2n& zb34SZ7C(_d`o-`dy~AtKAhqn0C)oP@+INli;|TV3-U9^8y|)PV86nYlhqm`r_Z@p5 zYM;g2+s{4fiI3(VQf`}1KO=smJKX24e_%~tTs!yycVPS*>1s)#66c|BC0cdt z5c|5fM$^P904T-Jhzi75al7I`hF;4r<*#g35jY!H_e@tsB*0XjQRLA)F|Jd+%0&+H zq!D323T)M}!HcVcUQnmgRg2ha@L{xCLMeU>XF&e!il~05KEK;|H^bHncX!MU){uYe z&^_F-2s$hP5Jd<8;QXaSE>`CD=DI{h1l|B%n-NdZ%5vd84`BJ`o)xkDo-z0wM z?d6BwnEhBX`lIU)N11-IpLq%;Hf&cf1pUHOX+BBauP0neR~$jG7{$F3MVh@y4)t=6 z7JfeF62K%F#)!R!5%%{1Vww1=Rb6UviTOs1E0f<_BC>+XfI4n4a zBB5ckrhv%CEqEk1^_ss&Gwz#GJmdJ61*cZC?-FX`5@zUe%;Fhdgt9s!NN*(UP<=ln z)<;4^9}GG^@alNFJRE2`vZbWBULs~j;Qj6B$QM%^+z?&YUY3s^OBDFOw z{Q@8I>xu?qp&~m92)<^eO~}w);xQwpHyBhN!>l~r)--tQ#JGtg7M))eACe!cgx<4! zjqIU>Mmegb-{Y4LN85ba>pXSSk!T9 z_;5XU0}|jqr&P-~G}U@o0rYTqT=pIb`@VbJ8*q4jxJrT&GIF%(nRpY;!hoB=jpn!9dH%!4kJIFg|vIhi0N5BKv`3h(5apf1$S zN2VsfqQrP|^KsraP17`GE$k96$41sSG30G7Q6%=eo^7ckoH6<=ACa{=$S~5JQu81Y za_$abL{aN3@paz__PzX=IS%a++>LHxDK&mRt0PbJ(TgX=Q@aIz*ju-T*9S2;@zZL| zWIpmtav%=dJqzXGp%VV7M3TZ#2$rbqgf4(Ba2y~syH1!ZK!j~m{?=|!Fmej zj>0{?_8wO}g7=na)+?C`6a`=&@}4uTrJfnhHIgrR3LSJzv15GBHL9{}8XP_8{q!R? zCF9!J`if#cax-4&p*HJ6@~heP7+JOeDgG)4>$lMn?uZY`^&w;m%ZhI>EN3g4;V@je zztcDg87($vH%fSufpl9ahcO?{8E@cO!e2{)aN)iuIKIO?eQtL=TbT$+If@cJvh{Y= zUI1ORbKelqe34^9;M>w!Y0flYi;HT>i<(D6yp+S$?A891D54J=8D3RAIL^kC(#!E3 zp5Cxofj3o^MG%o7zdkvhu-4xSTs}N@N*Hr*;v4%%q~} zj^Wd8_I68T=6TOi0F{*-hU#3WFPlP)C+KSFXSv_cj{>>#+cTOY0 zXkH;cf3dxey$xI&I(~0O%3cTi_GD4_b*Va>xNFsH!-o_smpi@6hq$&%me9>~hL+?B zc;%})rxNIv29*?B2wdcwc06Am_&f{zxd#wWB}_$Em20m^85ex7hAA=E!&LRH3ao^g zCneKB*5E!ttzNs=)CvEY;l*LP4`ZR{heIz%eWIfqVAS5z4G=Ig*k;8|+pgiTSb3!r zAkB=A(%N7!OFA!kQp}znnOD7%(e}No02VGl1G|1;O{Wz@q9Nym@ovqa0*fOfAcy@R zI-q+*y?l4H?2Mr;ec@dAKbjC#H}15bZkP+)FK(E#xr>W6$kO?z`N&ka2fJbDPeRtu z5WAPtZLyOq3>Bhuy0nXh1&9lU&_{U$6>oYR5?4Ec9M(IU*J%-LQ9w4(^q*w3 zcwHNH7Q*&RoFWKR@bdbJ2ODCMt$Bu{24cN!Y?`pkCDXA3F@3&?Ze;WL5WaLJ97+%h z8j+$8!nCy)#jG(%z`QZ!63udUz+=NR?`2&$iOTwnZG*07FI{2Bb=|q-x4KN#+R0gC zI(v5l^&IJTBdReF>O9O2(SS{@fk+yvtovfCw{I#!e4rJ$dD_v(g%b$pYx_Zi^jX*c zTMTq~){OLPW@+-cW0&cs?%^-Ky+PtIFzTZGl*NP15ZbQOi$2(e5y0>c>rqB6#|S>@%Y#U|Ut{_U4r-4g&MrjFVZx2T$@ztw9XjNP zddB7IebyA;Scz_1xpYZo4&SJ>0^>vrA9Ei;rk4@t>N>U%rX)2i6k_GE&`}x|t^zyC zgGB^e$v{>sh-@}bRjp}Pe$kiWUm>%`F;Bc|^C?QRc3f>|aN;w_9nQ!NB}8blqnjfG zs~=uSsJb5;tM*s>;&Gbz+8m@7s99Xee~y!3bf{FjblCOTibwq$ z<0@&m{Gzk*ND#>asgiXI+}S#Q^`r@{uW3XS98;|phfz)6HbjP&ok{fxgj)*c6SSAH zd_WaUfk{kR4Ae7ZZH57+l7z2OUv(EA$?GuxzH;M*hS+4U1t*b2wY)Ysk$}1F+;_Yg z!2UHt=<~Q}{=0!5@5-LE4A{%bD4O%pcEmb<_eS`c7qT0Q+e{BjX916`ZMWd<+JC!Y zHy0Le(BK$%9}L)kxnb_+CaS-vQQ}m$%x6|~*v-~W{>0TLYdjo!dpr@`HjfgaS6m+& zjT7Vs&6-YE_-9=u+RB>(&4iw|s<^=p(CbjMN$$K+q`3DBi$ua)+mH6GZ?OrecfHJ~ z#3&VBnar%O&pLDVRt6;gpfxY|D9r?YPE;w*rxJXxnrI4`h=GzU!Zh5W`w)IFre*CS zLS4r`pP!(I*B8LUeohDz(rx7->EpOL!v>_(!kwBigY>bz(R!PoKPp+_4w>}1>@NAmOxOlp-vBib_gGf*?n?{u~30z zD~t~nWNgbc*qGdTXe|tWFaLgPs^|%plgI#oq$dFIb0hAL0MOaR)6V>lb*Jrvtn&&F z=D?Y5!^Zo9nbYobcDH!KiNqZ$qJ_n1%P)50=m^?c34zHmi&=`D0C<_$^qfVbD(5hX z+IJogtwezA&WG#Ff+zd1WW$HlyXN@-&s*|{3EGHK9pA+b;10}$)~zIqXQkV?d0dUmBNOR0yN1C0 zNV}GWtVURxl*Co-I~PYszM51Mc!}|SioI+zJxhVA$VcC9Rf$B;X7rKT0zs5q!66E5 zT(&*)62cD!C_qyRLt|7$03FsFa#`N0_2T|TLV`p#VFA{6=3fcF;llCgK-B)|ofBh0 z;*wQAD@zwg`P@Y9>%f=6;}6s%U1a_!viRO=ABw6u6NjBsmDj6Bb~G3*-V1j#2@N)SzM{P8Vi`- zSv8$|pWV+^MAlgse%i;|WXu2w#}# zl#uKvKc?x+xuY)o#*HPO|#uT5MLI!sWot0w%mXkL`q{S`&t}LpEhvatnSN4e7n=4=MNl-j7wug ziLE21t?jw%)@@pO5!ENpZE;*KbYR+jD*P1;%W~=#7!KBKam-ksC;Y{;zU|YX8^82x ziP#B~ubt18@N9PZ8@C0X zhg2J2qqO4bNXe@=)%88zQhhE$655BEFje@v=rXDHMPT^IPI5Ct_WD)-wx{T-4|5z9 zeK~RZ+?*&}*bB$Q$i5(AL(ZYWi{1vhft#dYak3+hUGZ{VLOk~Jb6cnYf=&+B)N{t4 z7p#uptp^HO%0qfEu!5nfU%2FMY&!H$=)YNMFIGv|Ueg}f`sVDOQZh^X;q`(k1F2IX z4PYmP$ZMRV#T3SiZqW(jtLosIR@2y`lhE)N>Xst>84apZ&-OQQvU4_Z3p@NS0g^a+ zjHJpQ6&~UcjoVRZHAI~9^H}v&97k!^f@ORG^RcinCy52F?71Xl_T_|U#z%l36yfnq z$SpO{q~2vkbvg>+qfM2&Kf)`tcY1C38RIl2C=hJYhJVD-puF?Zz7S;|A$^_DG02gm zD3D93P$tUNu^0Rh7iXq4_-;HINdvJ?WBC;_+2P8_+!Ts#G-y(PSX|-b%6@xf7)M@) zhPb&`toTcD-M{aWhGxe1msudeLKu#1HID>+CX06?l93Q-i>~0@z(mQwyvEUmA4R{}e zP=?}DX`^xL-rB6kkbxc5wTeioab%w|M(3v~IoBvo63_#OF%-9NxY4XyZ@{&oe?Mrm zpW6-U_1xk}Vp73315uPcWzMEuEHR7c&oQu9G<6?JX-b~RfV zT#Mg|S7Z;^f?qqzjDLGl4-J$JZr;RizIbuc&|{Hf{DiTV`Ru z9PK+`NKH6dc%UsxjpAp`8v&r$wb`NCJ+SU}^`oi&ezDQ%ol=_$G)w>1gB9w76ILjT zyJ9Q5NqCjxe6_G`tSm9pcZiu3!7I3Ey_99X%(|j)q?yMfc=)n)E3CM#G4C5f&dknM zKTQsIiAx6z3HeE0_qUBnn^Z%kRw=Z&RszZ2O{2yYKO1 zZa#`>=WL++Ykv%Wru#!^H^N{HeqvbLUUA}=BII#--_M&=yRUQVaELJ+mk^hCYH{k( zhBRU^<0~XbA;H~eAcP@FiqYSr&SJ2`2t(o&cL~rbk;DA)m0IX4IEGBWEl8Z@9WaDI zGvo(?AqNVFV)8vA#=)x~c$XYeXg(#V{cD5EoL52E|8q+Y=4jEuL!p!_$fA_%Bb6g6 z0|rDsrJoJpq5j}2{*#T;d&JQINF0^F*>Oz_1j8}Z55O?gqamnCA%H;yj^U5ae|Hjv zW>9+Xsq_z?&R>nhb40!LazyWAUj-$C;g{L`Jt!njDjdw;&*1Z9T~&6Ug~Ize`|7)p z1>3`PQpIfc={=m@?75?wE~BvR4=4_@0`Ew^>|{Z%qbOtam$E?|Ma6>C4xuj#4H1311(|1ifS?(8ct;P^4YET zhqOio!GSMIAZYY#LJ^Y*^Lp}w!!Q^*sf-Fx6~}!=V`l+aES%ynvM9>ND!sYLf3yTc zm(NndnE`*HN#HM^jrB_+#H>~k`zKL=I-8D-RF@12f>sGRxs;6`LJDD?e?f4uP|28f zB1(xeBLChe=~UBDqF53!JtsG0C=JDjDmg5W2K^o5jB z1i}K#$$~T%V-OPvn{*E2`2>_6!7~E))3jN&DfwSC1mpxY%u@e>Ts@qwvHbJuqLXH;wm_1L*`-->Bk9I*dhUCLMN$inS-oaNGOxvL*l@63aI{pTCXL?UapP3_cD0-|rFOOn5+Ar^NTxkMQs7&+CZR7q=Q3Aa z%iD!Jf4j!Vyb*~vHquqthVHE2bEo?z8?j`*M>RQR3NN{k9XDg0!?5zGF-m>6a#ytY z3D5I>twpP~uQcdxBPglrR&A}Ns}t8Nr74}AovcVVw7pDsW%7E*rSM}e$GNCA){176 zXMRnUk%3^exj|PObK<_>SYd~)CMc%S_tSON-Y1kZo5Qmb_X4q^qH&t~w?3s|LWVFK zD=xKV!KFfV_2(E@1L|3HT?vmm2C6e((zN$83sMB+9kiV5_3qH-6jqJ}1z2N59Zs@2 zf!W`e@o2WZAeNs$408zIKjz;O2eo5$6ZP6@}mxjim> z(Kj+Kf*e|sOSU_{mcw5tRvNn6HXRjpS+aR94a&u20v+a}3kAo-Hu&hSMu?i9rt_!5 z$J9TSw2-9b2gz_UeDuH!Tc5#heOfFaQRNnLvu-?5KoV5Xs9rB z*K~hIEAa3(ceqti0WNcm;h2-PqoyZnT_lNyZ_c?TnB2zE+xc0Epp~`|u>vhx-OSq7f z6BL!kpdMr>zObW=r{h(sc_8ZBN}J^61(ENtomuFs#l{^KOOohKb(PHWE1koJ&)|)f ze+NHb!^P;dz+q7+xW)hjoJ0jDzLcCC9G!ut4o>EOR{#7ra|(_Mz2o(5dswl9&*1LS zsBi3582My$jZxuDdyG&APDP#SjmO}m_Xau*bs88F^b7ZS71y29O==V5>miN;HK=8< z{3(>=oO9=?@3cm@;jnoXaf+fel=1|5I7&{>5G|YvncKAOyWT>uAkyqQS;(k)IXFd< zl!>UMK#K-#%jin`@GNt3CHl;8ZbFb#GIgse2=-O5ags_0Kk#~0GNNp;viRh6LTl0EGfnv4M=WfOi&n<12QPTZTZK#<3Q#~GHY3H|F3x+}@ z;FN47I#x6m29)I`6bhc?XWkDP7PyjhD7E(-s02fU!5bk|oI@5^A%gi~XidqtMvpRrS58yl~pC&TJ_o9n*x_gR_Z$4Gz` zFzAyW-7sUSLaZ~y=MUbOh-_s*)ccJm=@XV7?jW!aC9}N+Rb4;NaRewt( z7ABRt$rNra=#V0b)nAM}jkM|~sC1y$bfCz@6IzJML-pt?L??eSdQU0JY9wJ6YV$c3 zdwGJ1-5PgetG2{ij>Y8)QfmO>;JT&nHXx3q;$z=}gje@Qsn!M4RH!@MQ`!4U-l%1!avH*2^O-?fL!yo*jSner@!NPWP|*S10D}tbR`2+HJUvKCz*s z6T$PxL+o%*o9nn%D&rT4oU>va;rLY9b#~vEK2KC1%TEUH zuYD(@$-tLYSYpoFf=ZIb-mGY0utSrwth=y~sG~}?oM!LWE383)_S`DER zw1kqza(Z>lxJ_IG(vi)Wcd9U0207+t&p!reF=X( zFq`qQ9rAj#)d(gthLJ*(sQdf@34IlfkwURERBzvE0gZv8>SL%`3_7W3`*GapyTH73 zo2pPa;L`j|bY2VczFpkyoz&A57g^vJckCq`DM1PD8DL9^SN)YD_J^gGZ1cqR5q?@5 zgZ0xGz-S>*dMn7OHi8ocfW zrnaTPB{b8d{%YG>WHM;=H!ctibzHfev5v2;pM4wOkwHzpd5ck?O*BP%H0UHLl6Y7P zcOLmefEqTK{2J6_yzoRIfTVHjwnbmZmBkNhP((A?pJ3ArcSiB@+h56}6xh7Cpkrz*PO-r-w0`bCGt`%MIGiQqoJ6LtVim{=p_SchS+*__i zhnksKZPUwND%xKwQF3q1&7?Hkz)ydnY4d$*arD>J$!G)L^X<~Kp#wVMw9+y@qda5O zkJ!iNuDDy!J0mq;lZi~M!Y-v>k?0PuidH_pp5$o~nfETU`4N-j1qwL6^9e~|Yl^jf zX4ipdLTHfb(YKQtZ?YgDEXCO6$aFQEPBghlZNlj0k& z)!at-WvQ|NSI?>$o7nwfI%ktMK;XiW{xjMW5~ht59&&qnn_@cV<@`!KMlZks&*#!- z@kaWc6H9)C91*@$@doPfL~pka;^%GW@sl;h?rx&FVEVM~Pa!XI;){6`B9Na_<$7o8fUV@Ta3g0TacdcQE`d`IvEd;3%?yq*c z#{n#1U!(X`6Us2Hc10rCbg}EdKa(*cpi$Fe5Hsc8N4Z4K`}{$(IEF>}u=4X-0qX^p zbXY^~dU;l;O>>w3xog>WfqH|%OW9!!-`@1w#Y6I5T#%8qQ)2h)KqWTc%pRREcAhuG zS1+Q7sL!(z2jo9~=SI9hL=f6~nFULR>CNT5%Kee>GifZs9@9cO)MpOBH_Jm-s(rZk z@oynGLuV0oh9sB#t)|+bw)e!*6`6|o@?kx4({bLz?_DoNHP$?Lc%gRV69?utUGiJI zNL=UOWCY+bp`@COJIfiMJyk*tAavT&ik2M90l(VmPd$e2*LTT+HY7@9EV}V%_OCkl zg10+cQ$P4@`F1~d<~V4vu%1bI7@m9YUaC*fdOM8=-XO!mUo|0glB+VV1Ue+020DcI zoFfRk8M_*Paz}Y#&tO>WyI<)wiIM|ziGczTsFASdUwO5NL^cE4FSKpeM0Up& zn{#yExrp-)1T_{#@CX*ef4g>N@2w*3z^+{rxE1|Z*Us3{@xPuO*tz)g$W)P+1!qLs zk)A@PTQg4Y-VfU0YGa@=^k@i%m@UK3P*HvS^t@g&r*m$hzOb4NHm=Re4J}Y z`!G_WAiKO&I|WFUQFKNtN>ils`art8^iXb-KbPAodYeisIvmg8yP-1H_f4Si-2LEN zOq=76s(}~29$#^3l+a8>b5Zm4?7b2hgNtBE)grW6L?U4XLd|4vG+(36uaNlA#v%m4}5uiWc?80+u>OcSe z;ICiguk(NSGlG)rKNE8+eSv~qIVJoHk+t`a7rJ3kd(_ z1qn6{|0*Z^o&5LOslUl3!AYq<$bYY-`klq^x%0nSgn^yt|H)_lk^=ml{P(os-{fQ9 zVy-{Pe@i0%&f)hI+TR?Y@cx>>|0dLaC;n#$`Zp=~iV6VuS3vqZ`R^ga-wa}C{v7)s z0mSe0zuWP@>G>G{r2mIi|DFDy%k1CO0Kf|KFVp{X(N&U#0gD#^fDHaXfF;z2_2=FH E19`W5;{X5v literal 0 HcmV?d00001 diff --git a/tests/fixtures/paragraph_with_margins.html b/tests/fixtures/paragraph_with_margins.html new file mode 100644 index 00000000..1b1098c5 --- /dev/null +++ b/tests/fixtures/paragraph_with_margins.html @@ -0,0 +1,11 @@ +

Heading1

+ Heading2 +

+ + + + Heading3 + +

From f74c638c1ae0015dbcef1b004a2024abfc9d92a4 Mon Sep 17 00:00:00 2001 From: tomnor Date: Tue, 7 Feb 2017 22:38:53 +0100 Subject: [PATCH 20/23] the LICENSE is Apache so align setup.py with it --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 9a17e299..74b40964 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ def main(): author_email="jason.louard.ward@gmail.com, samson91787@gmail.com", url="http://github.com/CenterForOpenScience/pydocx", platforms=["any"], - license="BSD", + license="Apache", packages=find_packages(), package_data={ 'pydocx': [ @@ -64,7 +64,7 @@ def main(): "Programming Language :: Python :: 3.4", "Programming Language :: Python :: Implementation :: PyPy", "Intended Audience :: Developers", - "License :: OSI Approved :: BSD License", + "License :: OSI Approved :: Apache Software License" "Operating System :: OS Independent", "Topic :: Text Processing :: Markup :: HTML", "Topic :: Text Processing :: Markup :: XML", From 84ab79f084899217b6fbd032f6b1bd1a95cfc965 Mon Sep 17 00:00:00 2001 From: Chirica Gheorghe Date: Tue, 14 Feb 2017 21:08:33 +0200 Subject: [PATCH 21/23] Fixed review remarks --- pydocx/export/html.py | 48 ++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/pydocx/export/html.py b/pydocx/export/html.py index 86bb34bc..e03564df 100644 --- a/pydocx/export/html.py +++ b/pydocx/export/html.py @@ -316,11 +316,8 @@ def export_paragraph_property_indentation(self, paragraph, results): style = {} - # for numbering properties we add style to span item level - if properties.numbering_properties is None: - indentation_left = properties.to_int('indentation_left') - indentation_first_line = properties.to_int('indentation_first_line') - else: + # Numbering properties can define a text indentation on a paragraph + if properties.numbering_properties: indentation_left = None indentation_first_line = None @@ -335,6 +332,9 @@ def export_paragraph_property_indentation(self, paragraph, results): if 'text-indent' in listing_style and listing_style['text-indent'] != '0.00em': style['text-indent'] = listing_style['text-indent'] style['display'] = 'inline-block' + else: + indentation_left = properties.to_int('indentation_left') + indentation_first_line = properties.to_int('indentation_first_line') indentation_right = properties.to_int('indentation_right') @@ -348,7 +348,6 @@ def export_paragraph_property_indentation(self, paragraph, results): if indentation_first_line: first_line = convert_twips_to_ems(indentation_first_line) - # TODO text-indent doesn't work with inline elements like span style['text-indent'] = '{0:.2f}em'.format(first_line) style['display'] = 'inline-block' @@ -375,7 +374,7 @@ def get_previous_level_paragraph(self, num_id, level_id): return prev_level_paragraphs[-1] if prev_level_id == 0 and not prev_level_paragraphs: - # this is an edge case with older version of word when it may contain a sublist + # This is an edge case with older version of word when it may contain a sublist # into a separate num_id. break @@ -383,8 +382,12 @@ def get_previous_level_paragraph(self, num_id, level_id): return None - def export_listing_paragraph_property_indentation(self, paragraph, level_properties, - include_text_indent=False): + def export_listing_paragraph_property_indentation( + self, + paragraph, + level_properties, + include_text_indent=False + ): style = {} if not level_properties or not paragraph.has_numbering_properties: @@ -407,48 +410,47 @@ def export_listing_paragraph_property_indentation(self, paragraph, level_propert left = paragraph_ind_left or level_ind_left hanging = paragraph_ind_hanging or level_ind_hanging - - # at this point we have no info about indentation, so we keep the default one + # At this point we have no info about indentation, so we keep the default one if not left and not hanging: return style if num_id not in self.numbering_level_listing_track: - # by default there are only 9 numbering levels in docx(0 indexed) + # By default there are only 9 numbering levels in docx(0 indexed) self.numbering_level_listing_track[num_id] = [[] for _ in range(10)] if paragraph not in self.numbering_level_listing_track[num_id][level_id]: self.numbering_level_listing_track[num_id][level_id].append(paragraph) - # by default left contains hanging as well, so we remove it + # By default left contains hanging as well, so we remove it left -= hanging if level_id == 0: - # because html ul/ol/li elements have there default indentations - # we remove the default word one as well - # this way we will have as near as possible migration to html + # Because html ul/ol/li elements have their default indentations + # We remove the default word one as well, + # This way we will have as near as possible migration to html left -= (default_level_indentation['left'] - level_ind_hanging) - # first line are added left margins + # First line are added to left margins if paragraph_ind_first_line: left += paragraph_ind_first_line if level_id > 0: - # for nested levels we need to add indentation based on parent level + # For nested levels we need to add indentation based on parent level prev_paragraph = self.get_previous_level_paragraph(num_id, level_id) if prev_paragraph: - prev_left_level_indentation = prev_paragraph.get_numbering_level().\ - paragraph_properties.to_int('indentation_left') + prev_left_level_indentation = prev_paragraph.get_numbering_level( + ).paragraph_properties.to_int('indentation_left') left -= (prev_left_level_indentation - level_ind_hanging) else: - # there are edge cases when we have a level > 0 for specific num_id but no + # There are edge cases when we have a level > 0 for specific num_id but no # actual level=0 for this num_id. in such cases we just do the default # indentation left -= level_ind_hanging - # because lists add there own nested level indentation we subtract it here + # Because lists add there own nested level indentation we subtract it here # and the remaining part will be the actual needed indentation left -= default_level_indentation['level_indentation_step'] - # here we well, we remove the default hanging which word adds + # Here as well, we remove the default hanging which word adds # because

tag will provide it's own hanging -= level_ind_hanging From a7b31cc953c4a1aaf1740977f30b3bbf4ca14c0d Mon Sep 17 00:00:00 2001 From: Chirica Gheorghe Date: Thu, 16 Feb 2017 17:44:51 +0200 Subject: [PATCH 22/23] Refactor nested separate lists to include more scenarios --- pydocx/export/numbering_span.py | 320 ++++++++++++++++--------- tests/export/test_numbering_span.py | 346 +++++++++++++++++++++++++++- 2 files changed, 550 insertions(+), 116 deletions(-) diff --git a/pydocx/export/numbering_span.py b/pydocx/export/numbering_span.py index c15bfe65..dade5985 100644 --- a/pydocx/export/numbering_span.py +++ b/pydocx/export/numbering_span.py @@ -138,6 +138,8 @@ class NumberingSpan(object): def __init__(self, numbering_level, numbering_definition, parent): self.children = [] + # Mark a separate nested list + self.is_separate_list = False self.numbering_level = numbering_level self.numbering_definition = numbering_definition self.parent = parent @@ -154,6 +156,16 @@ def get_first_child_of_first_item(self): return return first_item.children[0] + def get_last_child(self): + if not self.children: + return + last_item = self.children[-1] + + return last_item + + def get_numbering_level(self): + return self.numbering_level + class NumberingItem(object): ''' @@ -198,6 +210,7 @@ def __init__(self, components=None): self.candidate_numbering_items = [] self.child_parent_num_map = {} self.parent_child_num_map = {} + self.list_start_stop_index = {} self.detect_parent_child_map_for_items() @@ -208,6 +221,17 @@ def get_numbering_level(self, paragraph): return None return level + def _get_component_item(self, component, to_tuple=False): + item = { + 'num_id': component.numbering_definition.abstract_num_id, + 'level': component.get_numbering_level().level_id + } + + if to_tuple: + item = (item['num_id'], item['level']) + + return item + def detect_parent_child_map_for_items(self): """ There are cases when we have span inside an item and this span is different from @@ -225,85 +249,111 @@ def detect_parent_child_map_for_items(self): continue numbering(in this case '3. C'). We parse this as following: - let say that list: A, B, C has abstract_num_id = 1 - and list: B1, B2 has abstract_num_id = 4 + let say that list: A, B, C has abstract_num_id = 1, level = 0 + and list: B1, B2 has abstract_num_id = 4, level = 0 As output we will construct 2 dicts as follow: child_parent_num_map = { - "4": "1" + "4": {"num_id": '1', "level": '0'} } parent_child_num_map = { - "1": ["4"] + ("1", "0"): [{"num_id": '4', "level": '0'}] } So, when we process paragraph item we know from the start that it has a parent or not. """ if not self.components: - return + return False parent_child_map = {} child_parent_map = {} + list_start_stop_index = {} + # we are interested only in components that are part of the listing components = [component for component in self.components if hasattr(component, 'properties') and hasattr(component.properties, 'numbering_properties') - and component.numbering_definition] + and component.numbering_definition + and component.get_numbering_level()] if not components: - return + return False components_reversed = list(reversed(components)) for i, component in enumerate(components): - parent_num_id = component.numbering_definition.abstract_num_id + parent_item = self._get_component_item(component) + nums = [] outer_item_found = False for j, next_component in enumerate(components_reversed[:-1]): - next_num_id = next_component.numbering_definition.abstract_num_id - if parent_num_id == next_num_id and parent_num_id not in parent_child_map: + next_item = self._get_component_item(next_component) + if parent_item == next_item: outer_item_found = True + if not parent_item['num_id'] in list_start_stop_index: + # We need to find the index of the component from original + # self.components list so that we take into account all additional + # paragraphs that a list can contain + list_start_stop_index[parent_item['num_id']] = { + 'start': self.components.index(component), + 'stop': self.components.index(next_component) + } break if outer_item_found: for _component in components[i + 1:-j - 1]: - child_num_id = _component.numbering_definition.abstract_num_id - if child_num_id != parent_num_id: - nums.append(child_num_id) + child_item = self._get_component_item(_component) + if child_item['num_id'] != parent_item['num_id']: + nums.append(child_item) if nums: - parent_child_map[parent_num_id] = nums + # parent_key = parent_item['num_id'] + parent_key = (parent_item['num_id'], parent_item['level']) + if parent_key not in parent_child_map: + parent_child_map[parent_key] = [] - # save also the child parent map so that we can easily check if child has parents - for parent, children in parent_child_map.items(): - for child in children: - child_parent_map[child] = parent + for num in nums: + child_parent_map[num['num_id']] = parent_item + if num not in parent_child_map[parent_key]: + parent_child_map[parent_key].append(num) self.child_parent_num_map = child_parent_map self.parent_child_num_map = parent_child_map + self.list_start_stop_index = list_start_stop_index + + return True + + def has_parent_list(self, paragraph): + ''' + Check if current paragraph is inside a list which is separated from parent list. + ''' - def inside_parent_span(self, paragraph): if not paragraph.has_numbering_properties or not paragraph.has_numbering_definition: return False - paragraph_num_id = paragraph.numbering_definition.abstract_num_id - if not self.current_span: return False - return bool(self.child_parent_num_map.get(paragraph_num_id, None)) + num_item = self._get_component_item(paragraph) + + return bool(self.child_parent_num_map.get(num_item['num_id'], None)) def is_parent_of_current_span(self, paragraph): + ''' + + :param paragraph: + :return: + ''' if not paragraph.has_numbering_properties or not paragraph.has_numbering_definition: return False - paragraph_num_id = paragraph.numbering_definition.abstract_num_id - if not self.current_span: return True - current_span_num_id = self.current_span.numbering_definition.abstract_num_id + num_item = self._get_component_item(paragraph, to_tuple=True) + span_item = self._get_component_item(self.current_span) - return current_span_num_id in self.parent_child_num_map.get(paragraph_num_id, []) + return span_item in self.parent_child_num_map.get(num_item, []) def include_candidate_items_in_current_item(self, new_item_index): ''' @@ -323,7 +373,7 @@ def include_candidate_items_in_current_item(self, new_item_index): # Since we've processed all of the candidate numbering items, reset it self.candidate_numbering_items = [] - def should_start_new_span(self, paragraph): + def should_start_new_span(self, index, paragraph): ''' If there's not a current span, and the paragraph is a heading style, do not start a new span. @@ -337,37 +387,124 @@ def should_start_new_span(self, paragraph): if self.current_span is None: return True + level = self.get_numbering_level(paragraph) num_def = None if level: num_def = level.parent - if self.inside_parent_span(paragraph): + if num_def == self.current_span.numbering_definition: + return False + elif self.has_parent_list(paragraph): return False elif self.is_parent_of_current_span(paragraph): return False + elif self.current_span.is_separate_list: + return False + + list_idx = self.list_start_stop_index.get(num_def.abstract_num_id) + if list_idx and list_idx['start'] == index: + return True return num_def != self.current_span.numbering_definition - def should_start_new_item(self, paragraph): + def should_start_new_item(self, index, paragraph): ''' If there is not a current span, do not start a new item. If the paragraph is a heading style, do not start a new item. - Otherwise, only start a new item if the numbering definition of the - paragraph matches the numbering definition of the current span. + Start new item if: + Paragraph is from separate list and inside a span + Paragraph is from separate list and is parent of the current span + Paragraph level id is bigger then 0 which mean we are still inside list + Numbering definition of the paragraph matches the numbering definition of the + current span. ''' + if self.current_span is None: return False + level = self.get_numbering_level(paragraph) num_def = None if level: num_def = level.parent - if self.inside_parent_span(paragraph) or self.is_parent_of_current_span(paragraph): + if self.has_parent_list(paragraph): return True + elif self.is_parent_of_current_span(paragraph): + return True + else: + list_idx = self.list_start_stop_index.get(num_def.abstract_num_id) + # For mangled lists we need to make sure that we are not handling + # the first element from the list which have level > 0 + if list_idx and index > list_idx['start']: + # We are still in the list + if int(level.level_id) > 0: + return True return num_def == self.current_span.numbering_definition + def add_item_to_span(self, index, current_span=None): + ''' + Add a new item to the current span or the span we specify. + ''' + + self.current_span = current_span or self.current_span + + self.current_item = NumberingItem( + numbering_span=self.current_span, + ) + self.current_item_index = index + self.current_span.append_child(self.current_item) + + def add_new_span_and_item(self, index, level, parent_span=None): + parent_span = parent_span or self.current_span + + num_def = level.parent + + next_numbering_span = NumberingSpan( + numbering_level=level, + numbering_definition=num_def, + parent=parent_span, + ) + + self.numbering_span_stack.append(next_numbering_span) + next_numbering_item = NumberingItem( + numbering_span=next_numbering_span, + ) + + next_numbering_span.append_child(next_numbering_item) + self.current_item.append_child(next_numbering_span) + self.current_span = next_numbering_span + self.current_item = next_numbering_item + self.current_item_index = index + + def add_new_span_and_item_lower_level(self, index, level, previous_span=None): + num_def = level.parent + + level_id = int(level.level_id) + + if not previous_span: + # we need to "subtract" a level. To do that, find the level + # that we're going back to, which may not even exist + previous_span = self.find_previous_numbering_span_with_lower_level(level_id) + + if self.numbering_span_stack: + assert previous_span + self.current_span = previous_span + else: + # If the numbering_span_stack is empty now, it means + # we're handling a mangled level case + # For that scenario, create a new span + self.current_span = NumberingSpan( + numbering_level=level, + numbering_definition=num_def, + parent=self.current_span, + ) + self.numbering_span_stack = [self.current_span] + yield self.current_span + + self.add_item_to_span(index) + def handle_start_new_span(self, index, paragraph): level = self.get_numbering_level(paragraph) num_def = level.parent @@ -389,11 +526,7 @@ def handle_start_new_span(self, index, paragraph): self.numbering_span_stack = [self.current_span] - self.current_item = NumberingItem( - numbering_span=self.current_span, - ) - self.current_item_index = index - self.current_span.append_child(self.current_item) + self.add_item_to_span(index) def handle_start_new_item(self, index, paragraph): level = self.get_numbering_level(paragraph) @@ -408,83 +541,37 @@ def handle_start_new_item(self, index, paragraph): if level == self.current_span.numbering_level: # The level hasn't changed - self.current_item = NumberingItem( - numbering_span=self.current_span, - ) - self.current_item_index = index - self.current_span.append_child(self.current_item) + self.add_item_to_span(index) else: + has_parent_list = self.has_parent_list(paragraph) + is_parent_of_current_span = self.is_parent_of_current_span(paragraph) + level_id = int(level.level_id) current_level_id = int(self.current_span.numbering_level.level_id) - if level_id > current_level_id: - # Add a new span + item to hold this new level - next_numbering_span = NumberingSpan( - numbering_level=level, - numbering_definition=num_def, - parent=self.current_span, - ) - self.numbering_span_stack.append(next_numbering_span) - next_numbering_item = NumberingItem( - numbering_span=next_numbering_span, - ) - next_numbering_span.children.append(next_numbering_item) - self.current_item.append_child(next_numbering_span) - self.current_span = next_numbering_span - self.current_item = next_numbering_item - self.current_item_index = index - elif level_id < current_level_id or self.is_parent_of_current_span(paragraph): - # we need to "subtract" a level. To do that, find the level - # that we're going back to, which may not even exist - if self.is_parent_of_current_span(paragraph): - previous_span = self.find_previous_numbering_span_by_num_def(paragraph) + if num_def == self.current_span.numbering_definition: + # At this stage we process all the items that are part of the same list. + # All item from the same list have same numbering definition + if level_id > current_level_id: + self.add_new_span_and_item(index, level) + elif level_id < current_level_id: + for item in self.add_new_span_and_item_lower_level(index, level): + yield item + else: + # Here we deal with lists that separate from the parent list meaning + # that have different numbering definition + if not has_parent_list and not is_parent_of_current_span: + self.current_span = self.find_previous_numbering_span_by_num_def(paragraph) + self.current_item = self.current_span.get_last_child() + self.add_new_span_and_item(index, level) + elif has_parent_list and not is_parent_of_current_span: + self.current_span = self.find_parent_numbering_span(paragraph) + self.current_item = self.current_span.get_last_child() + self.add_new_span_and_item(index, level) + self.current_span.is_separate_list = True else: - previous_span = self.find_previous_numbering_span_with_lower_level( - level_id) - - if self.numbering_span_stack: - assert previous_span - self.current_span = previous_span - else: - # If the numbering_span_stack is empty now, it means - # we're handling a mangled level case - # For that scenario, create a new span - self.current_span = NumberingSpan( - numbering_level=level, - numbering_definition=num_def, - parent=self.current_span, - ) - self.numbering_span_stack = [self.current_span] - yield self.current_span - - self.current_item = NumberingItem( - numbering_span=self.current_span, - ) - self.current_item_index = index - self.current_span.append_child(self.current_item) - elif self.inside_parent_span(paragraph): - parent_span = self.find_parent_numbering_span(paragraph) - parent_span_last_item = parent_span.children[-1] - - next_numbering_span = NumberingSpan( - numbering_level=level, - numbering_definition=num_def, - parent=parent_span, - ) - - self.numbering_span_stack.append(next_numbering_span) - next_numbering_item = NumberingItem( - numbering_span=next_numbering_span, - ) - - next_numbering_span.children.append(next_numbering_item) - # add this span to the parent list - # which mean that parent list may be a different list - parent_span_last_item.append_child(next_numbering_span) - - self.current_span = next_numbering_span - self.current_item = next_numbering_item - self.current_item_index = index + self.current_span = self.find_previous_numbering_span_by_num_def(paragraph) + self.add_item_to_span(index) def find_previous_numbering_span_with_lower_level(self, level_id): previous_span = None @@ -510,14 +597,17 @@ def find_previous_numbering_span_by_num_def(self, paragraph): def find_parent_numbering_span(self, paragraph): previous_span = None - num_id = paragraph.numbering_definition.abstract_num_id - parent_id = self.child_parent_num_map.get(num_id, None) - if not parent_id: + num_item = self._get_component_item(paragraph) + + parent_num_item = self.child_parent_num_map.get(num_item['num_id'], None) + if not parent_num_item: return previous_span while self.numbering_span_stack: previous_span = self.numbering_span_stack[-1] - if previous_span.numbering_definition.abstract_num_id == parent_id: + previous_span_item = self._get_component_item(previous_span) + + if previous_span_item == parent_num_item: # we found the parent span of the paragraph item break self.numbering_span_stack.pop() @@ -544,8 +634,8 @@ def handle_paragraph(self, index, paragraph): self.candidate_numbering_items.append((index, paragraph)) return - start_new_span = self.should_start_new_span(paragraph) - start_new_item = self.should_start_new_item(paragraph) + start_new_span = self.should_start_new_span(index, paragraph) + start_new_item = self.should_start_new_item(index, paragraph) if start_new_span: for item in self.handle_start_new_span(index, paragraph): diff --git a/tests/export/test_numbering_span.py b/tests/export/test_numbering_span.py index cabcccd6..3a38912f 100644 --- a/tests/export/test_numbering_span.py +++ b/tests/export/test_numbering_span.py @@ -5,23 +5,31 @@ unicode_literals, ) - +import sys from unittest import TestCase from pydocx.export.numbering_span import NumberingSpanBuilder from pydocx.openxml.wordprocessing import ( Break, Paragraph, + ParagraphProperties, + NumberingProperties, Run, TabChar, Text, + Numbering ) +from pydocx.util.xml import parse_xml_from_string class NumberingSpanTestBase(TestCase): def setUp(self): self.builder = NumberingSpanBuilder() + def _load_from_xml(self, xml): + root = parse_xml_from_string(xml) + return Numbering.load(root) + class CleanParagraphTestCase(NumberingSpanTestBase): def test_empty_paragraph(self): @@ -370,3 +378,339 @@ def test_only_tabs_before_first_text_are_removed(self): self.builder.remove_initial_tab_chars_from_paragraph(paragraph) self.assertEqual(repr(paragraph), repr(expected)) + + +class DetectParentChildMapTestCase(NumberingSpanTestBase): + def assertDictEqual(self, d1, d2, msg=None): + if sys.version_info >= (2, 7): + super(DetectParentChildMapTestCase, self).assertDictEqual(d1, d2, msg=msg) + else: + if d1 != d2: + raise AssertionError("Dicts do not match: %s" % msg) + + def create_container(self): + xml = ''' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ''' + + numbering = self._load_from_xml(xml) + + container = type( + str('Container'), + (object,), + { + 'numbering_definitions_part': type(str('Numbering'), (Numbering,), + {'numbering': numbering}) + } + ) + + return container + + def create_numbering_paragraph(self, num_id, level_id='0', container=True): + paragraph_params = { + 'properties': ParagraphProperties( + numbering_properties=NumberingProperties( + num_id=num_id, + level_id=level_id + ) + ) + } + + if container: + paragraph_params['container'] = self.create_container() + + return Paragraph(**paragraph_params) + + def test_no_components_on_init(self): + builder = NumberingSpanBuilder() + self.assertEqual(builder.child_parent_num_map, {}) + self.assertEqual(builder.parent_child_num_map, {}) + self.assertFalse(builder.detect_parent_child_map_for_items()) + + def test_invalid_input_components(self): + components = [ + Paragraph(), + Paragraph(children=[ + Run(children=[ + TabChar(), + ]) + ]), + Paragraph( + properties=ParagraphProperties() + ), + Paragraph( + properties=ParagraphProperties( + numbering_properties=NumberingProperties() + ) + ), + self.create_numbering_paragraph('1', '0', container=False), + ] + + builder = NumberingSpanBuilder(components) + self.assertEqual(builder.parent_child_num_map, {}) + self.assertEqual(builder.child_parent_num_map, {}) + self.assertFalse(builder.detect_parent_child_map_for_items()) + + def test_valid_input_components_but_no_sublists_found(self): + components = [ + Paragraph(), + self.create_numbering_paragraph('1', '0'), + self.create_numbering_paragraph('1', '0'), + self.create_numbering_paragraph('2', '0'), + self.create_numbering_paragraph('2', '0'), + self.create_numbering_paragraph('3', '0'), + self.create_numbering_paragraph('3', '0'), + ] + + list_start_stop_index = { + '1': {'start': 1, 'stop': 2}, + '2': {'start': 3, 'stop': 4}, + '3': {'start': 5, 'stop': 6}, + } + + builder = NumberingSpanBuilder(components) + self.assertEqual(builder.parent_child_num_map, {}) + self.assertEqual(builder.child_parent_num_map, {}) + self.assertEqual(builder.list_start_stop_index, list_start_stop_index) + self.assertTrue(builder.detect_parent_child_map_for_items()) + + def test_sublist_found(self): + components = [ + Paragraph(), + self.create_numbering_paragraph('1', '0'), + self.create_numbering_paragraph('2', '0'), + self.create_numbering_paragraph('2', '0'), + self.create_numbering_paragraph('1', '0'), + ] + + builder = NumberingSpanBuilder(components) + parent_items = { + ('1', '0'): + [ + {'num_id': '2', 'level': '0'}, + ] + } + child_item = { + '2': {'num_id': '1', 'level': '0'} + } + + list_start_stop_index = { + '1': {'start': 1, 'stop': 4}, + '2': {'start': 2, 'stop': 3}, + } + + self.assertDictEqual(builder.parent_child_num_map, parent_items) + self.assertDictEqual(builder.child_parent_num_map, child_item) + self.assertEqual(builder.list_start_stop_index, list_start_stop_index) + self.assertTrue(builder.detect_parent_child_map_for_items()) + + def test_nested_sublist_found(self): + components = [ + self.create_numbering_paragraph('1', '0'), + self.create_numbering_paragraph('2', '0'), + self.create_numbering_paragraph('3', '0'), + self.create_numbering_paragraph('3', '0'), + self.create_numbering_paragraph('2', '0'), + self.create_numbering_paragraph('1', '0'), + ] + + builder = NumberingSpanBuilder(components) + + parent_items = { + ('1', '0'): + [ + {'num_id': '2', 'level': '0'}, + {'num_id': '3', 'level': '0'}, + ], + ('2', '0'): + [ + {'num_id': '3', 'level': '0'}, + ] + } + child_item = { + '2': {'num_id': '1', 'level': '0'}, + '3': {'num_id': '2', 'level': '0'}, + } + + list_start_stop_index = { + '1': {'start': 0, 'stop': 5}, + '2': {'start': 1, 'stop': 4}, + '3': {'start': 2, 'stop': 3}, + } + + self.assertDictEqual(builder.parent_child_num_map, parent_items) + self.assertDictEqual(builder.child_parent_num_map, child_item) + self.assertDictEqual(builder.list_start_stop_index, list_start_stop_index) + self.assertTrue(builder.detect_parent_child_map_for_items()) + + def test_nested_sublist_not_wrapped_in_parent_item(self): + components = [ + self.create_numbering_paragraph('1', '0'), + self.create_numbering_paragraph('2', '0'), + self.create_numbering_paragraph('3', '0'), + self.create_numbering_paragraph('3', '0'), + self.create_numbering_paragraph('1', '0'), + ] + + builder = NumberingSpanBuilder(components) + + parent_items = { + ('1', '0'): + [ + {'num_id': '2', 'level': '0'}, + {'num_id': '3', 'level': '0'}, + ] + } + child_item = { + '2': {'num_id': '1', 'level': '0'}, + '3': {'num_id': '1', 'level': '0'}, + } + + list_start_stop_index = { + '1': {'start': 0, 'stop': 4}, + '2': {'start': 1, 'stop': 1}, + '3': {'start': 2, 'stop': 3}, + } + + self.assertDictEqual(builder.parent_child_num_map, parent_items) + self.assertDictEqual(builder.child_parent_num_map, child_item) + self.assertDictEqual(builder.list_start_stop_index, list_start_stop_index) + self.assertTrue(builder.detect_parent_child_map_for_items()) + + def test_nested_sublist_parent_with_different_level(self): + components = [ + self.create_numbering_paragraph('1', '0'), + self.create_numbering_paragraph('1', '1'), + self.create_numbering_paragraph('3', '0'), + Paragraph(), + self.create_numbering_paragraph('3', '0'), + self.create_numbering_paragraph('1', '1'), + self.create_numbering_paragraph('2', '0'), + self.create_numbering_paragraph('2', '1'), + self.create_numbering_paragraph('4', '0'), + self.create_numbering_paragraph('4', '0'), + self.create_numbering_paragraph('1', '0'), + Paragraph(), + ] + + builder = NumberingSpanBuilder(components) + + parent_items = { + ('1', '0'): + [ + {'num_id': '3', 'level': '0'}, + {'num_id': '2', 'level': '0'}, + {'num_id': '2', 'level': '1'}, + {'num_id': '4', 'level': '0'}, + ], + ('1', '1'): + [ + {'num_id': '3', 'level': '0'}, + ] + } + child_item = { + '2': {'num_id': '1', 'level': '0'}, + '3': {'num_id': '1', 'level': '1'}, + '4': {'num_id': '1', 'level': '0'}, + } + + list_start_stop_index = { + '1': {'start': 0, 'stop': 10}, + '3': {'start': 2, 'stop': 4}, + '2': {'start': 6, 'stop': 6}, + '4': {'start': 8, 'stop': 9}, + } + + self.assertDictEqual(builder.parent_child_num_map, parent_items) + self.assertDictEqual(builder.child_parent_num_map, child_item) + self.assertDictEqual(builder.list_start_stop_index, list_start_stop_index) + self.assertTrue(builder.detect_parent_child_map_for_items()) + + def test_nested_sublist_multiple_levels(self): + components = [ + self.create_numbering_paragraph('1', '0'), + self.create_numbering_paragraph('1', '1'), + self.create_numbering_paragraph('2', '0'), + self.create_numbering_paragraph('3', '0'), + self.create_numbering_paragraph('4', '0'), + self.create_numbering_paragraph('3', '0'), + self.create_numbering_paragraph('2', '0'), + self.create_numbering_paragraph('1', '1'), + self.create_numbering_paragraph('1', '0'), + ] + + builder = NumberingSpanBuilder(components) + + parent_items = { + ('1', '0'): + [ + {'num_id': '2', 'level': '0'}, + {'num_id': '3', 'level': '0'}, + {'num_id': '4', 'level': '0'}, + ], + ('1', '1'): + [ + {'num_id': '2', 'level': '0'}, + {'num_id': '3', 'level': '0'}, + {'num_id': '4', 'level': '0'}, + ], + ('2', '0'): + [ + {'num_id': '3', 'level': '0'}, + {'num_id': '4', 'level': '0'}, + ], + ('3', '0'): + [ + {'num_id': '4', 'level': '0'}, + ] + } + child_item = { + '2': {'num_id': '1', 'level': '1'}, + '3': {'num_id': '2', 'level': '0'}, + '4': {'num_id': '3', 'level': '0'}, + } + + self.assertDictEqual(builder.parent_child_num_map, parent_items) + self.assertDictEqual(builder.child_parent_num_map, child_item) + self.assertTrue(builder.detect_parent_child_map_for_items()) From 3ae50cb916ddd0f13279c4f513ff20969b932bce Mon Sep 17 00:00:00 2001 From: Chirica Gheorghe Date: Fri, 17 Feb 2017 17:01:22 +0200 Subject: [PATCH 23/23] Fixed margins for separated nested lists --- pydocx/export/base.py | 3 +- pydocx/export/html.py | 104 ++-- pydocx/export/numbering_span.py | 52 +- pydocx/openxml/wordprocessing/paragraph.py | 25 +- tests/export/html/test_numbering.py | 504 +++++++++++++++++- tests/export/test_numbering_span.py | 64 ++- tests/fixtures/lists_with_styles.html | 8 +- tests/fixtures/nested_lists.html | 12 +- .../nested_lists_different_num_ids.html | 16 +- 9 files changed, 669 insertions(+), 119 deletions(-) diff --git a/pydocx/export/base.py b/pydocx/export/base.py index 2360a702..67b002bc 100644 --- a/pydocx/export/base.py +++ b/pydocx/export/base.py @@ -32,7 +32,6 @@ def __init__(self, path): self.captured_runs = None self.complex_field_runs = [] - self.numbering_level_listing_track = {} self.node_type_to_export_func_map = { wordprocessing.Document: self.export_document, @@ -287,7 +286,7 @@ def yield_numbering_spans(self, items): for item in items: yield item return - builder = self.numbering_span_builder_class(items) + builder = self.numbering_span_builder_class(items, process_components=True) numbering_spans = builder.get_numbering_spans() for item in numbering_spans: yield item diff --git a/pydocx/export/html.py b/pydocx/export/html.py index e03564df..18e3ea53 100644 --- a/pydocx/export/html.py +++ b/pydocx/export/html.py @@ -360,28 +360,6 @@ def export_paragraph_property_indentation(self, paragraph, results): return results - def get_previous_level_paragraph(self, num_id, level_id): - level_id = int(level_id) - - while True: - if level_id == 0: - prev_level_id = level_id - else: - prev_level_id = level_id - 1 - - prev_level_paragraphs = self.numbering_level_listing_track[num_id][prev_level_id] - if prev_level_paragraphs: - return prev_level_paragraphs[-1] - - if prev_level_id == 0 and not prev_level_paragraphs: - # This is an edge case with older version of word when it may contain a sublist - # into a separate num_id. - break - - level_id -= 1 - - return None - def export_listing_paragraph_property_indentation( self, paragraph, @@ -393,13 +371,11 @@ def export_listing_paragraph_property_indentation( if not level_properties or not paragraph.has_numbering_properties: return style - default_level_indentation = paragraph.get_numbering_default_level_indentation() + level_indentation_step = \ + paragraph.numbering_definition.get_indentation_between_levels() paragraph_properties = paragraph.properties - level_id = int(paragraph_properties.numbering_properties.level_id) - num_id = paragraph_properties.numbering_properties.num_id - level_ind_left = level_properties.to_int('indentation_left', default=0) level_ind_hanging = level_properties.to_int('indentation_hanging', default=0) @@ -414,49 +390,50 @@ def export_listing_paragraph_property_indentation( if not left and not hanging: return style - if num_id not in self.numbering_level_listing_track: - # By default there are only 9 numbering levels in docx(0 indexed) - self.numbering_level_listing_track[num_id] = [[] for _ in range(10)] - if paragraph not in self.numbering_level_listing_track[num_id][level_id]: - self.numbering_level_listing_track[num_id][level_id].append(paragraph) - - # By default left contains hanging as well, so we remove it - left -= hanging - - if level_id == 0: - # Because html ul/ol/li elements have their default indentations - # We remove the default word one as well, - # This way we will have as near as possible migration to html - left -= (default_level_indentation['left'] - level_ind_hanging) - - # First line are added to left margins - if paragraph_ind_first_line: - left += paragraph_ind_first_line - - if level_id > 0: - # For nested levels we need to add indentation based on parent level - prev_paragraph = self.get_previous_level_paragraph(num_id, level_id) - if prev_paragraph: - prev_left_level_indentation = prev_paragraph.get_numbering_level( - ).paragraph_properties.to_int('indentation_left') - left -= (prev_left_level_indentation - level_ind_hanging) - else: - # There are edge cases when we have a level > 0 for specific num_id but no - # actual level=0 for this num_id. in such cases we just do the default - # indentation - left -= level_ind_hanging + # All the bellow left margin calculation is done because html ul/ol/li elements have + # their default indentations and we need to make sure that we migrate as near as + # possible solution to html. + margin_left = left + + # Because hanging can be set independently, we remove it from left margin and will + # be added as text-indent later on + margin_left -= hanging + + # Take into account that current span can have custom left margin + if level_indentation_step > level_ind_hanging: + margin_left -= (level_indentation_step - level_ind_hanging) + else: + margin_left -= level_indentation_step - # Because lists add there own nested level indentation we subtract it here - # and the remaining part will be the actual needed indentation - left -= default_level_indentation['level_indentation_step'] + # First line are added to left margins + margin_left += paragraph_ind_first_line + + if isinstance(paragraph.parent, NumberingItem): + try: + # In case of nested lists elements, we need to adjust left margin + # based on the parent item + parent_paragraph = paragraph.parent.numbering_span.parent.get_first_child() + + parent_ind_left = parent_paragraph.get_indentation('indentation_left') + parent_ind_hanging = parent_paragraph.get_indentation('indentation_hanging') + parent_lvl_ind_hanging = parent_paragraph.get_indentation( + 'indentation_hanging') + + margin_left -= (parent_ind_left - parent_ind_hanging) + margin_left -= parent_lvl_ind_hanging + # To mimic the word way of setting first line, we need to move back(left) all + # elements by first_line value + margin_left -= parent_paragraph.get_indentation('indentation_first_line') + except AttributeError: + pass # Here as well, we remove the default hanging which word adds # because

tag will provide it's own hanging -= level_ind_hanging - if left: - left = convert_twips_to_ems(left) - style['margin-left'] = '{0:.2f}em'.format(left) + if margin_left: + margin_left = convert_twips_to_ems(margin_left) + style['margin-left'] = '{0:.2f}em'.format(margin_left) # we don't allow negative hanging if hanging < 0: @@ -466,7 +443,6 @@ def export_listing_paragraph_property_indentation( if hanging is not None: # Now, here we add the hanging as text-indent for the paragraph hanging = convert_twips_to_ems(hanging) - # TODO text-indent doesn't work with inline elements like span style['text-indent'] = '{0:.2f}em'.format(hanging) return style diff --git a/pydocx/export/numbering_span.py b/pydocx/export/numbering_span.py index dade5985..809ff672 100644 --- a/pydocx/export/numbering_span.py +++ b/pydocx/export/numbering_span.py @@ -138,6 +138,7 @@ class NumberingSpan(object): def __init__(self, numbering_level, numbering_definition, parent): self.children = [] + self._nested_level = 0 # Mark a separate nested list self.is_separate_list = False self.numbering_level = numbering_level @@ -166,6 +167,17 @@ def get_last_child(self): def get_numbering_level(self): return self.numbering_level + @property + def nested_level(self): + return self._nested_level + + def inc_nested_level(self): + nested_level = 0 + if isinstance(self.parent, (NumberingSpan, NumberingItem)): + nested_level = self.parent.nested_level + + self._nested_level = nested_level + 1 + class NumberingItem(object): ''' @@ -184,6 +196,16 @@ def append_child(self, child): child.parent = self self.children.append(child) + @property + def nested_level(self): + return self.parent.nested_level + + def get_first_child(self): + if self.children: + return self.children[0] + + return None + class BaseNumberingSpanBuilder(object): ''' @@ -199,7 +221,7 @@ class BaseNumberingSpanBuilder(object): accomplished using the NumberingSpan and NumberingItem classes. ''' - def __init__(self, components=None): + def __init__(self, components=None, process_components=False): if not components: components = [] self.components = components @@ -212,7 +234,8 @@ def __init__(self, components=None): self.parent_child_num_map = {} self.list_start_stop_index = {} - self.detect_parent_child_map_for_items() + if process_components: + self.detect_parent_child_map_for_items() @memoized def get_numbering_level(self, paragraph): @@ -270,7 +293,6 @@ def detect_parent_child_map_for_items(self): parent_child_map = {} child_parent_map = {} list_start_stop_index = {} - # we are interested only in components that are part of the listing components = [component for component in self.components if hasattr(component, 'properties') @@ -280,14 +302,17 @@ def detect_parent_child_map_for_items(self): if not components: return False - components_reversed = list(reversed(components)) - - for i, component in enumerate(components): + components_reversed_list = list(reversed(components)) + for i, component in enumerate(components[:-1]): parent_item = self._get_component_item(component) - nums = [] outer_item_found = False - for j, next_component in enumerate(components_reversed[:-1]): + if i > 0: + components_reversed = components_reversed_list[:-i] + else: + components_reversed = components_reversed_list + + for j, next_component in enumerate(components_reversed): next_item = self._get_component_item(next_component) if parent_item == next_item: outer_item_found = True @@ -300,12 +325,17 @@ def detect_parent_child_map_for_items(self): 'stop': self.components.index(next_component) } break - if outer_item_found: for _component in components[i + 1:-j - 1]: child_item = self._get_component_item(_component) + # We need to process only items that have different num_id + # which mean are part of the different list if child_item['num_id'] != parent_item['num_id']: - nums.append(child_item) + # Check if child is not already a parent + child_item_children = parent_child_map.get( + (child_item['num_id'], child_item['level']), []) + if parent_item not in child_item_children: + nums.append(child_item) if nums: # parent_key = parent_item['num_id'] parent_key = (parent_item['num_id'], parent_item['level']) @@ -478,6 +508,8 @@ def add_new_span_and_item(self, index, level, parent_span=None): self.current_item = next_numbering_item self.current_item_index = index + self.current_span.inc_nested_level() + def add_new_span_and_item_lower_level(self, index, level, previous_span=None): num_def = level.parent diff --git a/pydocx/openxml/wordprocessing/paragraph.py b/pydocx/openxml/wordprocessing/paragraph.py index 511ff379..bdb6b387 100644 --- a/pydocx/openxml/wordprocessing/paragraph.py +++ b/pydocx/openxml/wordprocessing/paragraph.py @@ -190,18 +190,19 @@ def has_numbering_properties(self): def has_numbering_definition(self): return bool(self.numbering_definition) - @memoized - def get_numbering_default_level_indentation(self, first_level_left=720): - """Given an input listing paragraph we calculate what is the default left - indentation on this level. Based on this we can determine whether we should - add margins to html

or leave the default added by tag.""" - - # by default a list is started with 'first_level_left' indentation. - - level_id = int(self.properties.numbering_properties.level_id) + def get_indentation(self, indentation, only_level_ind=False): + ''' + Get specific indentation of the current paragraph. If indentation is + not present on the paragraph level, get it from the numbering definition. + ''' - default_left_inc = self.numbering_definition.get_indentation_between_levels() + ind = None - left = first_level_left * (1 if not level_id else level_id) + if self.properties: + if not only_level_ind: + ind = self.properties.to_int(indentation) + if ind is None: + level = self.get_numbering_level() + ind = level.paragraph_properties.to_int(indentation, default=0) - return {'left': left, 'level_indentation_step': default_left_inc} + return ind diff --git a/tests/export/html/test_numbering.py b/tests/export/html/test_numbering.py index 8386499c..b41dc9de 100644 --- a/tests/export/html/test_numbering.py +++ b/tests/export/html/test_numbering.py @@ -33,6 +33,19 @@ class NumberingTestBase(object):

''' + simple_list_item_with_indentation = ''' +

+ + + + + + + + {content} +

+ ''' + simple_list_definition = ''' @@ -1114,6 +1127,493 @@ def test_root_level_numfmt_None_with_sublist(self): self.assert_document_generates_html(document, expected_html) +class NumberingIndentationTestCase(NumberingTestBase, DocumentGeneratorTestCase): + def test_no_numbering_definition_defined(self): + document_xml = ''' + {aaa} + {bbb} + {ccc} + '''.format( + aaa=self.simple_list_item.format( + content='AAA', + num_id=1, + ilvl=0, + ), + bbb=self.simple_list_item.format( + content='BBB', + num_id=1, + ilvl=1, + ), + ccc=self.simple_list_item.format( + content='CCC', + num_id=1, + ilvl=2, + ), + ) + + document = WordprocessingDocumentFactory() + document.add(MainDocumentPart, document_xml) + + expected_html = ''' +

AAA

BBB

CCC

+ ''' + self.assert_document_generates_html(document, expected_html) + + def test_default_indentation(self): + document_xml = ''' + {aaa} + {bbb} + {ccc} + '''.format( + aaa=self.simple_list_item.format( + content='AAA', + num_id=1, + ilvl=0, + ), + bbb=self.simple_list_item.format( + content='BBB', + num_id=1, + ilvl=1, + ), + ccc=self.simple_list_item.format( + content='CCC', + num_id=1, + ilvl=2, + ), + ) + + numbering_xml = ''' + + + + + + + + + + + + + + + + + + + + + + + + ''' + + document = WordprocessingDocumentFactory() + document.add(NumberingDefinitionsPart, numbering_xml) + document.add(MainDocumentPart, document_xml) + + expected_html = ''' +

AAA +
1. BBB +
  1. CCC
  +
+

+ ''' + self.assert_document_generates_html(document, expected_html) + + def test_custom_indentation(self): + document_xml = ''' + {aaa} + {bbb} + {ccc} + '''.format( + aaa=self.simple_list_item_with_indentation.format( + content='AAA', + num_id=1, + ilvl=0, + ind='left="1440" hanging="360"' + ), + bbb=self.simple_list_item_with_indentation.format( + content='BBB', + num_id=1, + ilvl=1, + ind='left="2880" hanging="360"' + ), + ccc=self.simple_list_item_with_indentation.format( + content='CCC', + num_id=1, + ilvl=2, + ind='left="4320" hanging="360"' + ), + ) + + numbering_xml = ''' + + + + + + + + + + + + + + + + + + + + + + + + ''' + + document = WordprocessingDocumentFactory() + document.add(NumberingDefinitionsPart, numbering_xml) + document.add(MainDocumentPart, document_xml) + + expected_html = ''' +

AAA +
1. BBB +
  1. CCC
  +
+

+ ''' + self.assert_document_generates_html(document, expected_html) + + def test_custom_hanging_indentation(self): + document_xml = ''' + {aaa} + {bbb} + {ccc} + '''.format( + aaa=self.simple_list_item_with_indentation.format( + content='AAA', + num_id=1, + ilvl=0, + ind='left="720" hanging="500"' + ), + bbb=self.simple_list_item_with_indentation.format( + content='BBB', + num_id=1, + ilvl=1, + ind='left="1440" hanging="700"' + ), + ccc=self.simple_list_item_with_indentation.format( + content='CCC', + num_id=1, + ilvl=2, + ind='left="2160" hanging="800"' + ), + ) + + numbering_xml = ''' + + + + + + + + + + + + + + + + + + + + + + + + ''' + + document = WordprocessingDocumentFactory() + document.add(NumberingDefinitionsPart, numbering_xml) + document.add(MainDocumentPart, document_xml) + + expected_html = ''' +

+ AAA +
1. + BBB +
  1. + CCC + +
  +
+

+ ''' + self.assert_document_generates_html(document, expected_html) + + def test_custom_first_line_indentation(self): + document_xml = ''' + {aaa} + {bbb} + {ccc} + '''.format( + aaa=self.simple_list_item_with_indentation.format( + content='AAA', + num_id=1, + ilvl=0, + ind='firstLine="360"' + ), + bbb=self.simple_list_item_with_indentation.format( + content='BBB', + num_id=1, + ilvl=1, + ind='firstLine="360"' + ), + ccc=self.simple_list_item_with_indentation.format( + content='CCC', + num_id=1, + ilvl=2, + ind='firstLine="360"' + ), + ) + + numbering_xml = ''' + + + + + + + + + + + + + + + + + + + + + + + + ''' + + document = WordprocessingDocumentFactory() + document.add(NumberingDefinitionsPart, numbering_xml) + document.add(MainDocumentPart, document_xml) + + expected_html = ''' +

AAA +
1. BBB +
  1. CCC
  +
+

+ ''' + self.assert_document_generates_html(document, expected_html) + + def test_nested_separated_lists(self): + document_xml = ''' + {aaa} + {bbb} + {ccc} + {ddd} + '''.format( + aaa=self.simple_list_item.format( + content='AAA', + num_id=1, + ilvl=0 + ), + bbb=self.simple_list_item.format( + content='BBB', + num_id=1, + ilvl=1, + ), + ccc=self.simple_list_item.format( + content='CCC', + num_id=2, + ilvl=0, + ), + ddd=self.simple_list_item.format( + content='DDD', + num_id=1, + ilvl=1, + ), + ) + + numbering_xml = ''' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ''' + + document = WordprocessingDocumentFactory() + document.add(NumberingDefinitionsPart, numbering_xml) + document.add(MainDocumentPart, document_xml) + + expected_html = ''' +

+ AAA +
1. + BBB +
  1. CCC
  +
2. DDD
+

+ ''' + self.assert_document_generates_html(document, expected_html) + + def test_nested_separated_lists_different_level(self): + document_xml = ''' + {aaa} + {bbb} + {ccc} + {ddd} + '''.format( + aaa=self.simple_list_item.format( + content='AAA', + num_id=1, + ilvl=0 + ), + bbb=self.simple_list_item.format( + content='BBB', + num_id=2, + ilvl=1, + ), + ccc=self.simple_list_item.format( + content='CCC', + num_id=2, + ilvl=1, + ), + ddd=self.simple_list_item.format( + content='DDD', + num_id=1, + ilvl=0, + ), + ) + + numbering_xml = ''' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ''' + + document = WordprocessingDocumentFactory() + document.add(NumberingDefinitionsPart, numbering_xml) + document.add(MainDocumentPart, document_xml) + + expected_html = ''' +

+ AAA +
1. BBB
2. CCC
+
DDD

+ ''' + self.assert_document_generates_html(document, expected_html) + + class FakedNumberingManyItemsTestCase(NumberingTestBase, DocumentGeneratorTestCase): def assert_html(self, list_type, digit_generator): paragraphs = [] @@ -1386,7 +1886,7 @@ def test_real_nested_list_continuation_fake_nested_list_using_indentation(self): expected_html = '''

AAA +
AAA
1. BBB
2. CCC