diff --git a/AUTHORS b/AUTHORS
index d1b33f05..c4b46c16 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -3,3 +3,4 @@ Jason Ward
Kyle Gibson
Chirica Gheorghe
Anirudha Bose
+Tarashish Mishra
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 41bb117b..120f60d2 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,3 +1,11 @@
+**dev**
+
+- Internal links and anchors are now retained. Thanks, sunu! `#222 `_
+
+**0.9.10**
+
+- No longer error when processing margin positions with decimal points.
+
**0.9.9**
- Rect elements now correctly handle image data
diff --git a/docs/index.rst b/docs/index.rst
index c304e103..05a0f761 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -11,4 +11,5 @@ PyDocX
export_mixins
enumerated_list_detection
development
+ plugins
release_notes
diff --git a/docs/plugins.rst b/docs/plugins.rst
new file mode 100644
index 00000000..40069e74
--- /dev/null
+++ b/docs/plugins.rst
@@ -0,0 +1,36 @@
+#######
+Plugins
+#######
+
+You may find yourself needing
+a feature in PyDocX that doesn't exist
+in the core library.
+
+If it's something that should exist, the
+PyDocX project is always open to new
+contributions. Details of how to contibute
+can be found in :doc:`/development`.
+
+For things that don't fit in the core
+library, it's easy to build a plugin
+based on the :doc:`Extending PyDocX ` and
+:doc:`Export Mixins ` sections.
+
+If you do build a plugin, edit this
+documentation and add it below so that
+other developers can find it.
+
+-----------------
+Available Plugins
+-----------------
+
+.. list-table::
+ :widths: 20 80
+ :header-rows: 1
+
+ * - Plugin
+ - Description
+ * - `pydocx-resize-images `_
+ - Resizes large images to the dimensions they are in the docx file
+ * - `pydocx-s3-images `_
+ - Uploads images to S3 instead of returning Data URIs
diff --git a/pydocx/__init__.py b/pydocx/__init__.py
index 4e5ebc69..d45f70ba 100644
--- a/pydocx/__init__.py
+++ b/pydocx/__init__.py
@@ -6,4 +6,4 @@
'PyDocX',
]
-__version__ = '0.9.9'
+__version__ = '0.9.10'
diff --git a/pydocx/export/base.py b/pydocx/export/base.py
index 9ca2afef..67b002bc 100644
--- a/pydocx/export/base.py
+++ b/pydocx/export/base.py
@@ -286,7 +286,7 @@ def yield_numbering_spans(self, items):
for item in items:
yield item
return
- builder = self.numbering_span_builder_class(items)
+ builder = self.numbering_span_builder_class(items, process_components=True)
numbering_spans = builder.get_numbering_spans()
for item in numbering_spans:
yield item
diff --git a/pydocx/export/html.py b/pydocx/export/html.py
index 40498a89..18e3ea53 100644
--- a/pydocx/export/html.py
+++ b/pydocx/export/html.py
@@ -17,7 +17,7 @@
POINTS_PER_EM,
PYDOCX_STYLES,
TWIPS_PER_POINT,
- EMUS_PER_PIXEL,
+ EMUS_PER_PIXEL
)
from pydocx.export.base import PyDocXExporter
from pydocx.export.numbering_span import NumberingItem
@@ -96,12 +96,12 @@ class HtmlTag(object):
closed_tag_format = '{tag}>'
def __init__(
- self,
- tag,
- allow_self_closing=False,
- closed=False,
- allow_whitespace=False,
- **attrs
+ self,
+ tag,
+ allow_self_closing=False,
+ closed=False,
+ allow_whitespace=False,
+ **attrs
):
self.tag = tag
self.allow_self_closing = allow_self_closing
@@ -270,6 +270,8 @@ def get_heading_tag(self, paragraph):
heading_style.name.lower(),
self.default_heading_level,
)
+ if paragraph.bookmark_name:
+ return HtmlTag(tag, id=paragraph.bookmark_name)
return HtmlTag(tag)
def export_paragraph(self, paragraph):
@@ -309,46 +311,45 @@ def export_paragraph_property_justification(self, paragraph, results):
def export_paragraph_property_indentation(self, paragraph, results):
# TODO these classes should be applied on the paragraph, and not as
# inline styles
+
properties = paragraph.effective_properties
style = {}
- if properties.indentation_right:
- # TODO would be nice if this integer conversion was handled
- # implicitly by the model somehow
- try:
- right = int(properties.indentation_right)
- except ValueError:
- right = None
+ # Numbering properties can define a text indentation on a paragraph
+ if properties.numbering_properties:
+ indentation_left = None
+ indentation_first_line = None
- if right:
- right = convert_twips_to_ems(right)
- style['margin-right'] = '{0:.2f}em'.format(right)
+ paragraph_num_level = paragraph.get_numbering_level()
- if properties.indentation_left:
- # TODO would be nice if this integer conversion was handled
- # implicitly by the model somehow
- try:
- left = int(properties.indentation_left)
- except ValueError:
- left = None
+ if paragraph_num_level:
+ listing_style = self.export_listing_paragraph_property_indentation(
+ paragraph,
+ paragraph_num_level.paragraph_properties,
+ include_text_indent=True
+ )
+ if 'text-indent' in listing_style and listing_style['text-indent'] != '0.00em':
+ style['text-indent'] = listing_style['text-indent']
+ style['display'] = 'inline-block'
+ else:
+ indentation_left = properties.to_int('indentation_left')
+ indentation_first_line = properties.to_int('indentation_first_line')
- if left:
- left = convert_twips_to_ems(left)
- style['margin-left'] = '{0:.2f}em'.format(left)
+ indentation_right = properties.to_int('indentation_right')
- if properties.indentation_first_line:
- # TODO would be nice if this integer conversion was handled
- # implicitly by the model somehow
- try:
- first_line = int(properties.indentation_first_line)
- except ValueError:
- first_line = None
+ if indentation_right:
+ right = convert_twips_to_ems(indentation_right)
+ style['margin-right'] = '{0:.2f}em'.format(right)
+
+ if indentation_left:
+ left = convert_twips_to_ems(indentation_left)
+ style['margin-left'] = '{0:.2f}em'.format(left)
- if first_line:
- first_line = convert_twips_to_ems(first_line)
- # TODO text-indent doesn't work with inline elements like span
- style['text-indent'] = '{0:.2f}em'.format(first_line)
+ if indentation_first_line:
+ first_line = convert_twips_to_ems(indentation_first_line)
+ style['text-indent'] = '{0:.2f}em'.format(first_line)
+ style['display'] = 'inline-block'
if style:
attrs = {
@@ -359,6 +360,93 @@ def export_paragraph_property_indentation(self, paragraph, results):
return results
+ def export_listing_paragraph_property_indentation(
+ self,
+ paragraph,
+ level_properties,
+ include_text_indent=False
+ ):
+ style = {}
+
+ if not level_properties or not paragraph.has_numbering_properties:
+ return style
+
+ level_indentation_step = \
+ paragraph.numbering_definition.get_indentation_between_levels()
+
+ paragraph_properties = paragraph.properties
+
+ level_ind_left = level_properties.to_int('indentation_left', default=0)
+ level_ind_hanging = level_properties.to_int('indentation_hanging', default=0)
+
+ paragraph_ind_left = paragraph_properties.to_int('indentation_left', default=0)
+ paragraph_ind_hanging = paragraph_properties.to_int('indentation_hanging', default=0)
+ paragraph_ind_first_line = paragraph_properties.to_int('indentation_first_line',
+ default=0)
+
+ left = paragraph_ind_left or level_ind_left
+ hanging = paragraph_ind_hanging or level_ind_hanging
+ # At this point we have no info about indentation, so we keep the default one
+ if not left and not hanging:
+ return style
+
+ # All the bellow left margin calculation is done because html ul/ol/li elements have
+ # their default indentations and we need to make sure that we migrate as near as
+ # possible solution to html.
+ margin_left = left
+
+ # Because hanging can be set independently, we remove it from left margin and will
+ # be added as text-indent later on
+ margin_left -= hanging
+
+ # Take into account that current span can have custom left margin
+ if level_indentation_step > level_ind_hanging:
+ margin_left -= (level_indentation_step - level_ind_hanging)
+ else:
+ margin_left -= level_indentation_step
+
+ # First line are added to left margins
+ margin_left += paragraph_ind_first_line
+
+ if isinstance(paragraph.parent, NumberingItem):
+ try:
+ # In case of nested lists elements, we need to adjust left margin
+ # based on the parent item
+ parent_paragraph = paragraph.parent.numbering_span.parent.get_first_child()
+
+ parent_ind_left = parent_paragraph.get_indentation('indentation_left')
+ parent_ind_hanging = parent_paragraph.get_indentation('indentation_hanging')
+ parent_lvl_ind_hanging = parent_paragraph.get_indentation(
+ 'indentation_hanging')
+
+ margin_left -= (parent_ind_left - parent_ind_hanging)
+ margin_left -= parent_lvl_ind_hanging
+ # To mimic the word way of setting first line, we need to move back(left) all
+ # elements by first_line value
+ margin_left -= parent_paragraph.get_indentation('indentation_first_line')
+ except AttributeError:
+ pass
+
+ # Here as well, we remove the default hanging which word adds
+ # because tag will provide it's own
+ hanging -= level_ind_hanging
+
+ if margin_left:
+ margin_left = convert_twips_to_ems(margin_left)
+ style['margin-left'] = '{0:.2f}em'.format(margin_left)
+
+ # we don't allow negative hanging
+ if hanging < 0:
+ hanging = 0
+
+ if include_text_indent:
+ if hanging is not None:
+ # Now, here we add the hanging as text-indent for the paragraph
+ hanging = convert_twips_to_ems(hanging)
+ style['text-indent'] = '{0:.2f}em'.format(hanging)
+
+ return style
+
def get_run_styles_to_apply(self, run):
parent_paragraph = run.get_first_ancestor(wordprocessing.Paragraph)
if parent_paragraph and parent_paragraph.heading_style:
@@ -507,7 +595,10 @@ def get_hyperlink_tag(self, target_uri):
def export_hyperlink(self, hyperlink):
results = super(PyDocXHTMLExporter, self).export_hyperlink(hyperlink)
- tag = self.get_hyperlink_tag(target_uri=hyperlink.target_uri)
+ if not hyperlink.target_uri and hyperlink.anchor:
+ tag = self.get_hyperlink_tag(target_uri='#' + hyperlink.anchor)
+ else:
+ tag = self.get_hyperlink_tag(target_uri=hyperlink.target_uri)
if tag:
results = tag.apply(results, allow_empty=False)
@@ -732,7 +823,25 @@ def export_numbering_item(self, numbering_item):
numbering_item.children,
self.export_node,
)
- tag = HtmlTag('li')
+
+ style = None
+
+ if numbering_item.children:
+ level_properties = numbering_item.numbering_span.\
+ numbering_level.paragraph_properties
+ # get the first paragraph properties which will contain information
+ # on how to properly indent listing item
+ paragraph = numbering_item.children[0]
+
+ style = self.export_listing_paragraph_property_indentation(paragraph,
+ level_properties)
+
+ attrs = {}
+
+ if style:
+ attrs['style'] = convert_dictionary_to_style_fragment(style)
+
+ tag = HtmlTag('li', **attrs)
return tag.apply(results)
def export_field_hyperlink(self, simple_field, field_args):
diff --git a/pydocx/export/numbering_span.py b/pydocx/export/numbering_span.py
index f84dbe3e..809ff672 100644
--- a/pydocx/export/numbering_span.py
+++ b/pydocx/export/numbering_span.py
@@ -10,16 +10,14 @@
import string
from pydocx.openxml import wordprocessing
-from pydocx.util.memoize import memoized
-
from pydocx.openxml.wordprocessing.run import Run
from pydocx.openxml.wordprocessing.tab_char import TabChar
from pydocx.openxml.wordprocessing.text import Text
+from pydocx.util.memoize import memoized
# Defined in 17.15.1.25
DEFAULT_AUTOMATIC_TAB_STOP_INTERVAL = 720 # twips
-
roman_numeral_map = tuple(zip(
(1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1),
('M', 'CM', 'D', 'CD', 'C', 'XC', 'L', 'XL', 'X', 'IX', 'V', 'IV', 'I')
@@ -140,6 +138,9 @@ class NumberingSpan(object):
def __init__(self, numbering_level, numbering_definition, parent):
self.children = []
+ self._nested_level = 0
+ # Mark a separate nested list
+ self.is_separate_list = False
self.numbering_level = numbering_level
self.numbering_definition = numbering_definition
self.parent = parent
@@ -156,6 +157,27 @@ def get_first_child_of_first_item(self):
return
return first_item.children[0]
+ def get_last_child(self):
+ if not self.children:
+ return
+ last_item = self.children[-1]
+
+ return last_item
+
+ def get_numbering_level(self):
+ return self.numbering_level
+
+ @property
+ def nested_level(self):
+ return self._nested_level
+
+ def inc_nested_level(self):
+ nested_level = 0
+ if isinstance(self.parent, (NumberingSpan, NumberingItem)):
+ nested_level = self.parent.nested_level
+
+ self._nested_level = nested_level + 1
+
class NumberingItem(object):
'''
@@ -174,6 +196,16 @@ def append_child(self, child):
child.parent = self
self.children.append(child)
+ @property
+ def nested_level(self):
+ return self.parent.nested_level
+
+ def get_first_child(self):
+ if self.children:
+ return self.children[0]
+
+ return None
+
class BaseNumberingSpanBuilder(object):
'''
@@ -189,7 +221,7 @@ class BaseNumberingSpanBuilder(object):
accomplished using the NumberingSpan and NumberingItem classes.
'''
- def __init__(self, components=None):
+ def __init__(self, components=None, process_components=False):
if not components:
components = []
self.components = components
@@ -198,6 +230,12 @@ def __init__(self, components=None):
self.current_item = None
self.current_item_index = 0
self.candidate_numbering_items = []
+ self.child_parent_num_map = {}
+ self.parent_child_num_map = {}
+ self.list_start_stop_index = {}
+
+ if process_components:
+ self.detect_parent_child_map_for_items()
@memoized
def get_numbering_level(self, paragraph):
@@ -206,6 +244,147 @@ def get_numbering_level(self, paragraph):
return None
return level
+ def _get_component_item(self, component, to_tuple=False):
+ item = {
+ 'num_id': component.numbering_definition.abstract_num_id,
+ 'level': component.get_numbering_level().level_id
+ }
+
+ if to_tuple:
+ item = (item['num_id'], item['level'])
+
+ return item
+
+ def detect_parent_child_map_for_items(self):
+ """
+ There are cases when we have span inside an item and this span is different from
+ the parent one.
+ Example listing:
+ 1. A
+ 2. B
+ Separate
+ * B1
+ * B2
+ 3. C
+
+ In the above example B1, B2 items are creating a separate span and does have different
+ num. definition. We need to somehow detect this cases and make sure we properly
+ continue numbering(in this case '3. C').
+
+ We parse this as following:
+ let say that list: A, B, C has abstract_num_id = 1, level = 0
+ and list: B1, B2 has abstract_num_id = 4, level = 0
+
+ As output we will construct 2 dicts as follow:
+ child_parent_num_map = {
+ "4": {"num_id": '1', "level": '0'}
+ }
+
+ parent_child_num_map = {
+ ("1", "0"): [{"num_id": '4', "level": '0'}]
+ }
+
+ So, when we process paragraph item we know from the start that it has a parent or not.
+ """
+
+ if not self.components:
+ return False
+
+ parent_child_map = {}
+ child_parent_map = {}
+ list_start_stop_index = {}
+ # we are interested only in components that are part of the listing
+ components = [component for component in self.components if
+ hasattr(component, 'properties')
+ and hasattr(component.properties, 'numbering_properties')
+ and component.numbering_definition
+ and component.get_numbering_level()]
+ if not components:
+ return False
+
+ components_reversed_list = list(reversed(components))
+ for i, component in enumerate(components[:-1]):
+ parent_item = self._get_component_item(component)
+ nums = []
+ outer_item_found = False
+ if i > 0:
+ components_reversed = components_reversed_list[:-i]
+ else:
+ components_reversed = components_reversed_list
+
+ for j, next_component in enumerate(components_reversed):
+ next_item = self._get_component_item(next_component)
+ if parent_item == next_item:
+ outer_item_found = True
+ if not parent_item['num_id'] in list_start_stop_index:
+ # We need to find the index of the component from original
+ # self.components list so that we take into account all additional
+ # paragraphs that a list can contain
+ list_start_stop_index[parent_item['num_id']] = {
+ 'start': self.components.index(component),
+ 'stop': self.components.index(next_component)
+ }
+ break
+ if outer_item_found:
+ for _component in components[i + 1:-j - 1]:
+ child_item = self._get_component_item(_component)
+ # We need to process only items that have different num_id
+ # which mean are part of the different list
+ if child_item['num_id'] != parent_item['num_id']:
+ # Check if child is not already a parent
+ child_item_children = parent_child_map.get(
+ (child_item['num_id'], child_item['level']), [])
+ if parent_item not in child_item_children:
+ nums.append(child_item)
+ if nums:
+ # parent_key = parent_item['num_id']
+ parent_key = (parent_item['num_id'], parent_item['level'])
+ if parent_key not in parent_child_map:
+ parent_child_map[parent_key] = []
+
+ for num in nums:
+ child_parent_map[num['num_id']] = parent_item
+ if num not in parent_child_map[parent_key]:
+ parent_child_map[parent_key].append(num)
+
+ self.child_parent_num_map = child_parent_map
+ self.parent_child_num_map = parent_child_map
+ self.list_start_stop_index = list_start_stop_index
+
+ return True
+
+ def has_parent_list(self, paragraph):
+ '''
+ Check if current paragraph is inside a list which is separated from parent list.
+ '''
+
+ if not paragraph.has_numbering_properties or not paragraph.has_numbering_definition:
+ return False
+
+ if not self.current_span:
+ return False
+
+ num_item = self._get_component_item(paragraph)
+
+ return bool(self.child_parent_num_map.get(num_item['num_id'], None))
+
+ def is_parent_of_current_span(self, paragraph):
+ '''
+
+ :param paragraph:
+ :return:
+ '''
+ if not paragraph.has_numbering_properties or not paragraph.has_numbering_definition:
+ return False
+
+ if not self.current_span:
+ return True
+
+ num_item = self._get_component_item(paragraph, to_tuple=True)
+ span_item = self._get_component_item(self.current_span)
+
+ return span_item in self.parent_child_num_map.get(num_item, [])
+
def include_candidate_items_in_current_item(self, new_item_index):
'''
A generator to determine which of the candidate numbering items need to
@@ -224,7 +403,7 @@ def include_candidate_items_in_current_item(self, new_item_index):
# Since we've processed all of the candidate numbering items, reset it
self.candidate_numbering_items = []
- def should_start_new_span(self, paragraph):
+ def should_start_new_span(self, index, paragraph):
'''
If there's not a current span, and the paragraph is a heading
style, do not start a new span.
@@ -235,29 +414,129 @@ def should_start_new_span(self, paragraph):
span, start a new span.
Otherwise, do not start a new span.
'''
+
if self.current_span is None:
return True
+
level = self.get_numbering_level(paragraph)
num_def = None
if level:
num_def = level.parent
+
+ if num_def == self.current_span.numbering_definition:
+ return False
+ elif self.has_parent_list(paragraph):
+ return False
+ elif self.is_parent_of_current_span(paragraph):
+ return False
+ elif self.current_span.is_separate_list:
+ return False
+
+ list_idx = self.list_start_stop_index.get(num_def.abstract_num_id)
+ if list_idx and list_idx['start'] == index:
+ return True
+
return num_def != self.current_span.numbering_definition
- def should_start_new_item(self, paragraph):
+ def should_start_new_item(self, index, paragraph):
'''
If there is not a current span, do not start a new item.
If the paragraph is a heading style, do not start a new item.
- Otherwise, only start a new item if the numbering definition of the
- paragraph matches the numbering definition of the current span.
+ Start new item if:
+ Paragraph is from separate list and inside a span
+ Paragraph is from separate list and is parent of the current span
+ Paragraph level id is bigger then 0 which mean we are still inside list
+ Numbering definition of the paragraph matches the numbering definition of the
+ current span.
'''
+
if self.current_span is None:
return False
+
level = self.get_numbering_level(paragraph)
num_def = None
if level:
num_def = level.parent
+
+ if self.has_parent_list(paragraph):
+ return True
+ elif self.is_parent_of_current_span(paragraph):
+ return True
+ else:
+ list_idx = self.list_start_stop_index.get(num_def.abstract_num_id)
+ # For mangled lists we need to make sure that we are not handling
+ # the first element from the list which have level > 0
+ if list_idx and index > list_idx['start']:
+ # We are still in the list
+ if int(level.level_id) > 0:
+ return True
+
return num_def == self.current_span.numbering_definition
+ def add_item_to_span(self, index, current_span=None):
+ '''
+ Add a new item to the current span or the span we specify.
+ '''
+
+ self.current_span = current_span or self.current_span
+
+ self.current_item = NumberingItem(
+ numbering_span=self.current_span,
+ )
+ self.current_item_index = index
+ self.current_span.append_child(self.current_item)
+
+ def add_new_span_and_item(self, index, level, parent_span=None):
+ parent_span = parent_span or self.current_span
+
+ num_def = level.parent
+
+ next_numbering_span = NumberingSpan(
+ numbering_level=level,
+ numbering_definition=num_def,
+ parent=parent_span,
+ )
+
+ self.numbering_span_stack.append(next_numbering_span)
+ next_numbering_item = NumberingItem(
+ numbering_span=next_numbering_span,
+ )
+
+ next_numbering_span.append_child(next_numbering_item)
+ self.current_item.append_child(next_numbering_span)
+ self.current_span = next_numbering_span
+ self.current_item = next_numbering_item
+ self.current_item_index = index
+
+ self.current_span.inc_nested_level()
+
+ def add_new_span_and_item_lower_level(self, index, level, previous_span=None):
+ num_def = level.parent
+
+ level_id = int(level.level_id)
+
+ if not previous_span:
+ # we need to "subtract" a level. To do that, find the level
+ # that we're going back to, which may not even exist
+ previous_span = self.find_previous_numbering_span_with_lower_level(level_id)
+
+ if self.numbering_span_stack:
+ assert previous_span
+ self.current_span = previous_span
+ else:
+ # If the numbering_span_stack is empty now, it means
+ # we're handling a mangled level case
+ # For that scenario, create a new span
+ self.current_span = NumberingSpan(
+ numbering_level=level,
+ numbering_definition=num_def,
+ parent=self.current_span,
+ )
+ self.numbering_span_stack = [self.current_span]
+ yield self.current_span
+
+ self.add_item_to_span(index)
+
def handle_start_new_span(self, index, paragraph):
level = self.get_numbering_level(paragraph)
num_def = level.parent
@@ -279,11 +558,7 @@ def handle_start_new_span(self, index, paragraph):
self.numbering_span_stack = [self.current_span]
- self.current_item = NumberingItem(
- numbering_span=self.current_span,
- )
- self.current_item_index = index
- self.current_span.append_child(self.current_item)
+ self.add_item_to_span(index)
def handle_start_new_item(self, index, paragraph):
level = self.get_numbering_level(paragraph)
@@ -298,54 +573,37 @@ def handle_start_new_item(self, index, paragraph):
if level == self.current_span.numbering_level:
# The level hasn't changed
- self.current_item = NumberingItem(
- numbering_span=self.current_span,
- )
- self.current_item_index = index
- self.current_span.append_child(self.current_item)
+ self.add_item_to_span(index)
else:
+ has_parent_list = self.has_parent_list(paragraph)
+ is_parent_of_current_span = self.is_parent_of_current_span(paragraph)
+
level_id = int(level.level_id)
current_level_id = int(self.current_span.numbering_level.level_id)
- if level_id > current_level_id:
- # Add a new span + item to hold this new level
- next_numbering_span = NumberingSpan(
- numbering_level=level,
- numbering_definition=num_def,
- parent=self.current_span,
- )
- self.numbering_span_stack.append(next_numbering_span)
- next_numbering_item = NumberingItem(
- numbering_span=next_numbering_span,
- )
- next_numbering_span.children.append(next_numbering_item)
- self.current_item.append_child(next_numbering_span)
- self.current_span = next_numbering_span
- self.current_item = next_numbering_item
- self.current_item_index = index
- elif level_id < current_level_id:
- # we need to "subtract" a level. To do that, find the level
- # that we're going back to, which may not even exist
- previous_span = self.find_previous_numbering_span_with_lower_level(level_id)
- if self.numbering_span_stack:
- assert previous_span
- self.current_span = previous_span
- else:
- # If the numbering_span_stack is empty now, it means
- # we're handling a mangled level case
- # For that scenario, create a new span
- self.current_span = NumberingSpan(
- numbering_level=level,
- numbering_definition=num_def,
- parent=self.current_span,
- )
- self.numbering_span_stack = [self.current_span]
- yield self.current_span
- self.current_item = NumberingItem(
- numbering_span=self.current_span,
- )
- self.current_item_index = index
- self.current_span.append_child(self.current_item)
+ if num_def == self.current_span.numbering_definition:
+ # At this stage we process all the items that are part of the same list.
+ # All item from the same list have same numbering definition
+ if level_id > current_level_id:
+ self.add_new_span_and_item(index, level)
+ elif level_id < current_level_id:
+ for item in self.add_new_span_and_item_lower_level(index, level):
+ yield item
+ else:
+ # Here we deal with lists that separate from the parent list meaning
+ # that have different numbering definition
+ if not has_parent_list and not is_parent_of_current_span:
+ self.current_span = self.find_previous_numbering_span_by_num_def(paragraph)
+ self.current_item = self.current_span.get_last_child()
+ self.add_new_span_and_item(index, level)
+ elif has_parent_list and not is_parent_of_current_span:
+ self.current_span = self.find_parent_numbering_span(paragraph)
+ self.current_item = self.current_span.get_last_child()
+ self.add_new_span_and_item(index, level)
+ self.current_span.is_separate_list = True
+ else:
+ self.current_span = self.find_previous_numbering_span_by_num_def(paragraph)
+ self.add_item_to_span(index)
def find_previous_numbering_span_with_lower_level(self, level_id):
previous_span = None
@@ -358,6 +616,36 @@ def find_previous_numbering_span_with_lower_level(self, level_id):
self.numbering_span_stack.pop()
return previous_span
+ def find_previous_numbering_span_by_num_def(self, paragraph):
+ previous_span = None
+ while self.numbering_span_stack:
+ previous_span = self.numbering_span_stack[-1]
+ if previous_span.numbering_definition == paragraph.numbering_definition:
+ # we found the parent span of the paragraph item
+ break
+ self.numbering_span_stack.pop()
+ return previous_span
+
+ def find_parent_numbering_span(self, paragraph):
+ previous_span = None
+
+ num_item = self._get_component_item(paragraph)
+
+ parent_num_item = self.child_parent_num_map.get(num_item['num_id'], None)
+ if not parent_num_item:
+ return previous_span
+
+ while self.numbering_span_stack:
+ previous_span = self.numbering_span_stack[-1]
+ previous_span_item = self._get_component_item(previous_span)
+
+ if previous_span_item == parent_num_item:
+ # we found the parent span of the paragraph item
+ break
+ self.numbering_span_stack.pop()
+
+ return previous_span
+
def handle_paragraph(self, index, paragraph):
level = self.get_numbering_level(paragraph)
num_def = None
@@ -378,8 +666,8 @@ def handle_paragraph(self, index, paragraph):
self.candidate_numbering_items.append((index, paragraph))
return
- start_new_span = self.should_start_new_span(paragraph)
- start_new_item = self.should_start_new_item(paragraph)
+ start_new_span = self.should_start_new_span(index, paragraph)
+ start_new_item = self.should_start_new_item(index, paragraph)
if start_new_span:
for item in self.handle_start_new_span(index, paragraph):
@@ -549,8 +837,10 @@ def detect_new_faked_level_started(self, paragraph, current_level_id=None):
def get_left_position_for_numbering_span(self, numbering_span):
paragraph = numbering_span.get_first_child_of_first_item()
+
left_pos = self.get_left_position_for_paragraph(paragraph)
num_level_para_properties = numbering_span.numbering_level.paragraph_properties
+
if num_level_para_properties:
left_pos += num_level_para_properties.start_margin_position
return left_pos
diff --git a/pydocx/openxml/wordprocessing/__init__.py b/pydocx/openxml/wordprocessing/__init__.py
index 02da2556..4fce72a2 100644
--- a/pydocx/openxml/wordprocessing/__init__.py
+++ b/pydocx/openxml/wordprocessing/__init__.py
@@ -1,6 +1,7 @@
# coding: utf-8
from pydocx.openxml.wordprocessing.abstract_num import AbstractNum
from pydocx.openxml.wordprocessing.body import Body
+from pydocx.openxml.wordprocessing.bookmark import Bookmark
from pydocx.openxml.wordprocessing.br import Break
from pydocx.openxml.wordprocessing.deleted_run import DeletedRun
from pydocx.openxml.wordprocessing.deleted_text import DeletedText
@@ -26,6 +27,7 @@
from pydocx.openxml.wordprocessing.picture import Picture
from pydocx.openxml.wordprocessing.run import Run
from pydocx.openxml.wordprocessing.run_properties import RunProperties # noqa
+from pydocx.openxml.wordprocessing.rfonts import RFonts
from pydocx.openxml.wordprocessing.sdt_block import SdtBlock
from pydocx.openxml.wordprocessing.sdt_content_block import SdtContentBlock
from pydocx.openxml.wordprocessing.sdt_content_run import SdtContentRun
@@ -46,6 +48,7 @@
__all__ = [
'AbstractNum',
'Body',
+ 'Bookmark',
'Break',
'DeletedRun',
'DeletedText',
@@ -71,6 +74,7 @@
'Picture',
'Run',
'RunProperties',
+ 'RFonts',
'SdtBlock',
'SdtContentBlock',
'SdtContentRun',
diff --git a/pydocx/openxml/wordprocessing/abstract_num.py b/pydocx/openxml/wordprocessing/abstract_num.py
index 98b0727f..cdad313e 100644
--- a/pydocx/openxml/wordprocessing/abstract_num.py
+++ b/pydocx/openxml/wordprocessing/abstract_num.py
@@ -27,3 +27,20 @@ def __init__(self, **kwargs):
def get_level(self, level_id):
return self._levels.get(level_id)
+
+ def get_indentation_between_levels(self):
+ """
+ Depending on the word version we may get a different default indentation between
+ levels. For this we will only check first 2 levels as the other follow the same step.
+ """
+
+ try:
+ lvl0_ind = self.levels[0].paragraph_properties.to_int('indentation_left',
+ default=0)
+ lvl1_ind = self.levels[1].paragraph_properties.to_int('indentation_left',
+ default=0)
+ ind_step = lvl1_ind - lvl0_ind
+ except IndexError:
+ ind_step = 720 # default one
+
+ return ind_step
diff --git a/pydocx/openxml/wordprocessing/bookmark.py b/pydocx/openxml/wordprocessing/bookmark.py
new file mode 100644
index 00000000..1e7bf417
--- /dev/null
+++ b/pydocx/openxml/wordprocessing/bookmark.py
@@ -0,0 +1,14 @@
+# coding: utf-8
+from __future__ import (
+ absolute_import,
+ print_function,
+ unicode_literals,
+)
+
+from pydocx.models import XmlModel, XmlAttribute
+
+
+class Bookmark(XmlModel):
+ XML_TAG = 'bookmarkStart'
+
+ name = XmlAttribute(name='name')
diff --git a/pydocx/openxml/wordprocessing/paragraph.py b/pydocx/openxml/wordprocessing/paragraph.py
index af59dd7b..bdb6b387 100644
--- a/pydocx/openxml/wordprocessing/paragraph.py
+++ b/pydocx/openxml/wordprocessing/paragraph.py
@@ -4,7 +4,7 @@
print_function,
unicode_literals,
)
-
+from pydocx.util.memoize import memoized
from pydocx.models import XmlModel, XmlCollection, XmlChild
from pydocx.openxml.wordprocessing.hyperlink import Hyperlink
from pydocx.openxml.wordprocessing.paragraph_properties import ParagraphProperties # noqa
@@ -16,6 +16,7 @@
from pydocx.openxml.wordprocessing.deleted_run import DeletedRun
from pydocx.openxml.wordprocessing.sdt_run import SdtRun
from pydocx.openxml.wordprocessing.simple_field import SimpleField
+from pydocx.openxml.wordprocessing.bookmark import Bookmark
class Paragraph(XmlModel):
@@ -31,6 +32,7 @@ class Paragraph(XmlModel):
DeletedRun,
SdtRun,
SimpleField,
+ Bookmark
)
def __init__(self, **kwargs):
@@ -45,6 +47,10 @@ def effective_properties(self):
self._effective_properties = properties
return self._effective_properties
+ @property
+ def numbering_definition(self):
+ return self.get_numbering_definition()
+
def has_structured_document_parent(self):
from pydocx.openxml.wordprocessing import SdtBlock
return self.has_ancestor(SdtBlock)
@@ -83,9 +89,8 @@ def heading_style(self):
def heading_style(self, style):
self._heading_style = style
+ @memoized
def get_numbering_definition(self):
- # TODO add memoization
-
# TODO the getattr is necessary because of footnotes. From the context
# of a footnote, a paragraph's container is the footnote part, which
# doesn't have access to the numbering_definitions_part
@@ -101,8 +106,8 @@ def get_numbering_definition(self):
num_id=numbering_properties.num_id,
)
+ @memoized
def get_numbering_level(self):
- # TODO add memoization
numbering_definition = self.get_numbering_definition()
if not numbering_definition:
return
@@ -121,6 +126,12 @@ def runs(self):
if isinstance(p_child, Run):
yield p_child
+ @property
+ def bookmark_name(self):
+ for p_child in self.children:
+ if isinstance(p_child, Bookmark):
+ return p_child.name
+
def get_text(self, tab_char=None):
'''
Return a string of all of the contained Text nodes concatenated
@@ -168,3 +179,30 @@ def get_number_of_initial_tabs(self):
else:
break
return tab_count
+
+ @property
+ @memoized
+ def has_numbering_properties(self):
+ return bool(getattr(self.properties, 'numbering_properties', None))
+
+ @property
+ @memoized
+ def has_numbering_definition(self):
+ return bool(self.numbering_definition)
+
+ def get_indentation(self, indentation, only_level_ind=False):
+ '''
+ Get specific indentation of the current paragraph. If indentation is
+ not present on the paragraph level, get it from the numbering definition.
+ '''
+
+ ind = None
+
+ if self.properties:
+ if not only_level_ind:
+ ind = self.properties.to_int(indentation)
+ if ind is None:
+ level = self.get_numbering_level()
+ ind = level.paragraph_properties.to_int(indentation, default=0)
+
+ return ind
diff --git a/pydocx/openxml/wordprocessing/paragraph_properties.py b/pydocx/openxml/wordprocessing/paragraph_properties.py
index d893bd7b..c6bbc374 100644
--- a/pydocx/openxml/wordprocessing/paragraph_properties.py
+++ b/pydocx/openxml/wordprocessing/paragraph_properties.py
@@ -35,11 +35,19 @@ def start_margin_position(self):
# ignored.
start_margin = 0
if self.indentation_left:
- start_margin += int(self.indentation_left)
+ start_margin += int(float(self.indentation_left))
if self.indentation_hanging:
- start_margin -= int(self.indentation_hanging)
+ start_margin -= int(float(self.indentation_hanging))
elif self.indentation_first_line:
- start_margin += int(self.indentation_first_line)
+ start_margin += int(float(self.indentation_first_line))
if start_margin:
return start_margin
return 0
+
+ def to_int(self, attribute, default=None):
+ # TODO would be nice if this integer conversion was handled
+ # implicitly by the model somehow
+ try:
+ return int(getattr(self, attribute, default))
+ except (ValueError, TypeError):
+ return default
diff --git a/pydocx/openxml/wordprocessing/rfonts.py b/pydocx/openxml/wordprocessing/rfonts.py
new file mode 100644
index 00000000..ea95216e
--- /dev/null
+++ b/pydocx/openxml/wordprocessing/rfonts.py
@@ -0,0 +1,25 @@
+# coding: utf-8
+from __future__ import (
+ absolute_import,
+ print_function,
+ unicode_literals,
+)
+
+from pydocx.models import XmlModel, XmlAttribute
+
+
+class RFonts(XmlModel):
+ XML_TAG = 'rFonts'
+
+ hint = XmlAttribute(name='hint')
+ ascii = XmlAttribute(name='ascii')
+ h_ansi = XmlAttribute(name='hAnsi')
+ east_asia = XmlAttribute(name='eastAsia')
+ cs = XmlAttribute(name='cs')
+ ascii_theme = XmlAttribute(name='asciiTheme')
+ h_ansi_theme = XmlAttribute(name='hAnsiTheme')
+ east_asia_theme = XmlAttribute(name='eastAsiaTheme')
+ cs_theme = XmlAttribute(name='cstheme')
+
+ def is_symbol(self):
+ return self.h_ansi == 'Symbol'
diff --git a/pydocx/openxml/wordprocessing/run_properties.py b/pydocx/openxml/wordprocessing/run_properties.py
index 46867e65..63587a57 100644
--- a/pydocx/openxml/wordprocessing/run_properties.py
+++ b/pydocx/openxml/wordprocessing/run_properties.py
@@ -7,6 +7,7 @@
from pydocx.models import XmlModel, XmlChild
from pydocx.types import OnOff, Underline
+from pydocx.openxml.wordprocessing.rfonts import RFonts
class RunProperties(XmlModel):
@@ -26,6 +27,7 @@ class RunProperties(XmlModel):
pos = XmlChild(name='position', attrname='val')
sz = XmlChild(name='sz', attrname='val')
clr = XmlChild(name='color', attrname='val')
+ r_fonts = XmlChild(type=RFonts)
@property
def color(self):
diff --git a/setup.py b/setup.py
index 9a17e299..74b40964 100644
--- a/setup.py
+++ b/setup.py
@@ -42,7 +42,7 @@ def main():
author_email="jason.louard.ward@gmail.com, samson91787@gmail.com",
url="http://github.com/CenterForOpenScience/pydocx",
platforms=["any"],
- license="BSD",
+ license="Apache",
packages=find_packages(),
package_data={
'pydocx': [
@@ -64,7 +64,7 @@ def main():
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: Implementation :: PyPy",
"Intended Audience :: Developers",
- "License :: OSI Approved :: BSD License",
+ "License :: OSI Approved :: Apache Software License"
"Operating System :: OS Independent",
"Topic :: Text Processing :: Markup :: HTML",
"Topic :: Text Processing :: Markup :: XML",
diff --git a/tests/export/html/test_heading.py b/tests/export/html/test_heading.py
index efd7ab1c..b77dfc2d 100644
--- a/tests/export/html/test_heading.py
+++ b/tests/export/html/test_heading.py
@@ -744,3 +744,30 @@ def test_single_lvl_list_has_precedence_over_headings(self):
'''
self.assert_document_generates_html(document, expected_html)
+
+ def test_heading_with_bookmark(self):
+ document_xml = '''
+
+
+
+
+
+
+
+ aaa
+
+
+ '''
+
+ style_xml = '''
+
+ '''
+
+ document = WordprocessingDocumentFactory()
+ document.add(StyleDefinitionsPart, style_xml)
+ document.add(MainDocumentPart, document_xml)
+
+ expected_html = 'aaa
'
+ self.assert_document_generates_html(document, expected_html)
diff --git a/tests/export/html/test_hyperlink.py b/tests/export/html/test_hyperlink.py
index a88ab748..dbbe4a0c 100644
--- a/tests/export/html/test_hyperlink.py
+++ b/tests/export/html/test_hyperlink.py
@@ -194,3 +194,21 @@ def test_with_anchor(self):
expected_html = 'link.
'
self.assert_document_generates_html(document, expected_html)
+
+ def test_internal_link(self):
+ document_xml = '''
+
+
+
+ link
+
+
+
+ '''
+
+ document = WordprocessingDocumentFactory()
+
+ document.add(MainDocumentPart, document_xml)
+
+ expected_html = 'link
'
+ self.assert_document_generates_html(document, expected_html)
diff --git a/tests/export/html/test_numbering.py b/tests/export/html/test_numbering.py
index 8386499c..b41dc9de 100644
--- a/tests/export/html/test_numbering.py
+++ b/tests/export/html/test_numbering.py
@@ -33,6 +33,19 @@ class NumberingTestBase(object):
'''
+ simple_list_item_with_indentation = '''
+
+
+
+
+
+
+
+
+ {content}
+
+ '''
+
simple_list_definition = '''
@@ -1114,6 +1127,493 @@ def test_root_level_numfmt_None_with_sublist(self):
self.assert_document_generates_html(document, expected_html)
+class NumberingIndentationTestCase(NumberingTestBase, DocumentGeneratorTestCase):
+ def test_no_numbering_definition_defined(self):
+ document_xml = '''
+ {aaa}
+ {bbb}
+ {ccc}
+ '''.format(
+ aaa=self.simple_list_item.format(
+ content='AAA',
+ num_id=1,
+ ilvl=0,
+ ),
+ bbb=self.simple_list_item.format(
+ content='BBB',
+ num_id=1,
+ ilvl=1,
+ ),
+ ccc=self.simple_list_item.format(
+ content='CCC',
+ num_id=1,
+ ilvl=2,
+ ),
+ )
+
+ document = WordprocessingDocumentFactory()
+ document.add(MainDocumentPart, document_xml)
+
+ expected_html = '''
+ AAA
+ BBB
+ CCC
+ '''
+ self.assert_document_generates_html(document, expected_html)
+
+ def test_default_indentation(self):
+ document_xml = '''
+ {aaa}
+ {bbb}
+ {ccc}
+ '''.format(
+ aaa=self.simple_list_item.format(
+ content='AAA',
+ num_id=1,
+ ilvl=0,
+ ),
+ bbb=self.simple_list_item.format(
+ content='BBB',
+ num_id=1,
+ ilvl=1,
+ ),
+ ccc=self.simple_list_item.format(
+ content='CCC',
+ num_id=1,
+ ilvl=2,
+ ),
+ )
+
+ numbering_xml = '''
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ '''
+
+ document = WordprocessingDocumentFactory()
+ document.add(NumberingDefinitionsPart, numbering_xml)
+ document.add(MainDocumentPart, document_xml)
+
+ expected_html = '''
+
+ - AAA
+
+ - BBB
+
+ - CCC
+
+
+
+
+
+ '''
+ self.assert_document_generates_html(document, expected_html)
+
+ def test_custom_indentation(self):
+ document_xml = '''
+ {aaa}
+ {bbb}
+ {ccc}
+ '''.format(
+ aaa=self.simple_list_item_with_indentation.format(
+ content='AAA',
+ num_id=1,
+ ilvl=0,
+ ind='left="1440" hanging="360"'
+ ),
+ bbb=self.simple_list_item_with_indentation.format(
+ content='BBB',
+ num_id=1,
+ ilvl=1,
+ ind='left="2880" hanging="360"'
+ ),
+ ccc=self.simple_list_item_with_indentation.format(
+ content='CCC',
+ num_id=1,
+ ilvl=2,
+ ind='left="4320" hanging="360"'
+ ),
+ )
+
+ numbering_xml = '''
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ '''
+
+ document = WordprocessingDocumentFactory()
+ document.add(NumberingDefinitionsPart, numbering_xml)
+ document.add(MainDocumentPart, document_xml)
+
+ expected_html = '''
+
+ - AAA
+
+ - BBB
+
+ - CCC
+
+
+
+
+
+ '''
+ self.assert_document_generates_html(document, expected_html)
+
+ def test_custom_hanging_indentation(self):
+ document_xml = '''
+ {aaa}
+ {bbb}
+ {ccc}
+ '''.format(
+ aaa=self.simple_list_item_with_indentation.format(
+ content='AAA',
+ num_id=1,
+ ilvl=0,
+ ind='left="720" hanging="500"'
+ ),
+ bbb=self.simple_list_item_with_indentation.format(
+ content='BBB',
+ num_id=1,
+ ilvl=1,
+ ind='left="1440" hanging="700"'
+ ),
+ ccc=self.simple_list_item_with_indentation.format(
+ content='CCC',
+ num_id=1,
+ ilvl=2,
+ ind='left="2160" hanging="800"'
+ ),
+ )
+
+ numbering_xml = '''
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ '''
+
+ document = WordprocessingDocumentFactory()
+ document.add(NumberingDefinitionsPart, numbering_xml)
+ document.add(MainDocumentPart, document_xml)
+
+ expected_html = '''
+
+ -
+ AAA
+
+ -
+ BBB
+
+ -
+ CCC
+
+
+
+
+
+
+
+ '''
+ self.assert_document_generates_html(document, expected_html)
+
+ def test_custom_first_line_indentation(self):
+ document_xml = '''
+ {aaa}
+ {bbb}
+ {ccc}
+ '''.format(
+ aaa=self.simple_list_item_with_indentation.format(
+ content='AAA',
+ num_id=1,
+ ilvl=0,
+ ind='firstLine="360"'
+ ),
+ bbb=self.simple_list_item_with_indentation.format(
+ content='BBB',
+ num_id=1,
+ ilvl=1,
+ ind='firstLine="360"'
+ ),
+ ccc=self.simple_list_item_with_indentation.format(
+ content='CCC',
+ num_id=1,
+ ilvl=2,
+ ind='firstLine="360"'
+ ),
+ )
+
+ numbering_xml = '''
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ '''
+
+ document = WordprocessingDocumentFactory()
+ document.add(NumberingDefinitionsPart, numbering_xml)
+ document.add(MainDocumentPart, document_xml)
+
+ expected_html = '''
+
+ - AAA
+
+ - BBB
+
+ - CCC
+
+
+
+
+
+ '''
+ self.assert_document_generates_html(document, expected_html)
+
+ def test_nested_separated_lists(self):
+ document_xml = '''
+ {aaa}
+ {bbb}
+ {ccc}
+ {ddd}
+ '''.format(
+ aaa=self.simple_list_item.format(
+ content='AAA',
+ num_id=1,
+ ilvl=0
+ ),
+ bbb=self.simple_list_item.format(
+ content='BBB',
+ num_id=1,
+ ilvl=1,
+ ),
+ ccc=self.simple_list_item.format(
+ content='CCC',
+ num_id=2,
+ ilvl=0,
+ ),
+ ddd=self.simple_list_item.format(
+ content='DDD',
+ num_id=1,
+ ilvl=1,
+ ),
+ )
+
+ numbering_xml = '''
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ '''
+
+ document = WordprocessingDocumentFactory()
+ document.add(NumberingDefinitionsPart, numbering_xml)
+ document.add(MainDocumentPart, document_xml)
+
+ expected_html = '''
+
+ -
+ AAA
+
+ -
+ BBB
+
+ - CCC
+
+
+ - DDD
+
+
+
+ '''
+ self.assert_document_generates_html(document, expected_html)
+
+ def test_nested_separated_lists_different_level(self):
+ document_xml = '''
+ {aaa}
+ {bbb}
+ {ccc}
+ {ddd}
+ '''.format(
+ aaa=self.simple_list_item.format(
+ content='AAA',
+ num_id=1,
+ ilvl=0
+ ),
+ bbb=self.simple_list_item.format(
+ content='BBB',
+ num_id=2,
+ ilvl=1,
+ ),
+ ccc=self.simple_list_item.format(
+ content='CCC',
+ num_id=2,
+ ilvl=1,
+ ),
+ ddd=self.simple_list_item.format(
+ content='DDD',
+ num_id=1,
+ ilvl=0,
+ ),
+ )
+
+ numbering_xml = '''
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ '''
+
+ document = WordprocessingDocumentFactory()
+ document.add(NumberingDefinitionsPart, numbering_xml)
+ document.add(MainDocumentPart, document_xml)
+
+ expected_html = '''
+
+ -
+ AAA
+
+ - BBB
+ - CCC
+
+
+ - DDD
+
+ '''
+ self.assert_document_generates_html(document, expected_html)
+
+
class FakedNumberingManyItemsTestCase(NumberingTestBase, DocumentGeneratorTestCase):
def assert_html(self, list_type, digit_generator):
paragraphs = []
@@ -1386,7 +1886,7 @@ def test_real_nested_list_continuation_fake_nested_list_using_indentation(self):
expected_html = '''
- - AAA
+
- AAA
- BBB
- CCC
@@ -1445,7 +1945,7 @@ def test_real_nested_list_continuation_fake_list_using_indentation(self):
expected_html = '''
- - AAA
+
- AAA
- BBB
diff --git a/tests/export/mixins/test_faked_superscript_and_subscript.py b/tests/export/mixins/test_faked_superscript_and_subscript.py
index a0e6cce9..53fffa3f 100644
--- a/tests/export/mixins/test_faked_superscript_and_subscript.py
+++ b/tests/export/mixins/test_faked_superscript_and_subscript.py
@@ -8,12 +8,12 @@
from pydocx.export.html import PyDocXHTMLExporter
from pydocx.export.mixins import FakedSuperscriptAndSubscriptExportMixin
+from pydocx.openxml.packaging import MainDocumentPart, StyleDefinitionsPart
from pydocx.test import DocumentGeneratorTestCase, DocXFixtureTestCaseFactory
from pydocx.test.utils import (
PyDocXHTMLExporterNoStyle,
WordprocessingDocumentFactory,
)
-from pydocx.openxml.packaging import MainDocumentPart, StyleDefinitionsPart
class FakedSuperscriptAndSubscriptHTMLExporterNoStyle(
@@ -342,4 +342,5 @@ class DocXFixtureTestCase(DocXFixtureTestCaseFactory):
'fake_superscript',
)
+
DocXFixtureTestCase.generate()
diff --git a/tests/export/test_docx.py b/tests/export/test_docx.py
index 8ebe3f47..16fa43d1 100644
--- a/tests/export/test_docx.py
+++ b/tests/export/test_docx.py
@@ -32,11 +32,14 @@ class ConvertDocxToHtmlTestCase(DocXFixtureTestCaseFactory):
'has_title',
'inline_tags',
'justification',
+ 'paragraph_with_margins',
'list_in_table',
+ 'lists_with_margins',
'lists_with_styles',
'missing_numbering',
'missing_style',
'nested_lists',
+ 'nested_lists_different_num_ids',
'nested_table_rowspan',
'nested_tables',
'no_break_hyphen',
diff --git a/tests/export/test_numbering_span.py b/tests/export/test_numbering_span.py
index cabcccd6..ea2cc7a7 100644
--- a/tests/export/test_numbering_span.py
+++ b/tests/export/test_numbering_span.py
@@ -5,23 +5,31 @@
unicode_literals,
)
-
+import sys
from unittest import TestCase
from pydocx.export.numbering_span import NumberingSpanBuilder
from pydocx.openxml.wordprocessing import (
Break,
Paragraph,
+ ParagraphProperties,
+ NumberingProperties,
Run,
TabChar,
Text,
+ Numbering
)
+from pydocx.util.xml import parse_xml_from_string
class NumberingSpanTestBase(TestCase):
def setUp(self):
self.builder = NumberingSpanBuilder()
+ def _load_from_xml(self, xml):
+ root = parse_xml_from_string(xml)
+ return Numbering.load(root)
+
class CleanParagraphTestCase(NumberingSpanTestBase):
def test_empty_paragraph(self):
@@ -370,3 +378,381 @@ def test_only_tabs_before_first_text_are_removed(self):
self.builder.remove_initial_tab_chars_from_paragraph(paragraph)
self.assertEqual(repr(paragraph), repr(expected))
+
+
+class DetectParentChildMapTestCase(NumberingSpanTestBase):
+ def setUp(self):
+ pass
+
+ def assertDictEqual(self, d1, d2, msg=None):
+ if sys.version_info >= (2, 7):
+ super(DetectParentChildMapTestCase, self).assertDictEqual(d1, d2, msg=msg)
+ else:
+ if d1 != d2:
+ raise AssertionError("Dicts do not match: %s" % msg)
+
+ def create_container(self):
+ xml = '''
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ '''
+
+ numbering = self._load_from_xml(xml)
+
+ container = type(
+ str('Container'),
+ (object,),
+ {
+ 'numbering_definitions_part': type(str('Numbering'), (Numbering,),
+ {'numbering': numbering})
+ }
+ )
+
+ return container
+
+ def create_numbering_paragraph(self, num_id, level_id='0', container=True):
+ paragraph_params = {
+ 'properties': ParagraphProperties(
+ numbering_properties=NumberingProperties(
+ num_id=num_id,
+ level_id=level_id
+ )
+ )
+ }
+
+ if container:
+ paragraph_params['container'] = self.create_container()
+
+ return Paragraph(**paragraph_params)
+
+ def test_no_components_on_init(self):
+ builder = NumberingSpanBuilder()
+ result = builder.detect_parent_child_map_for_items()
+
+ self.assertEqual(builder.child_parent_num_map, {})
+ self.assertEqual(builder.parent_child_num_map, {})
+ self.assertFalse(result)
+
+ def test_invalid_input_components(self):
+ components = [
+ Paragraph(),
+ Paragraph(children=[
+ Run(children=[
+ TabChar(),
+ ])
+ ]),
+ Paragraph(
+ properties=ParagraphProperties()
+ ),
+ Paragraph(
+ properties=ParagraphProperties(
+ numbering_properties=NumberingProperties()
+ )
+ ),
+ self.create_numbering_paragraph('1', '0', container=False),
+ ]
+
+ builder = NumberingSpanBuilder(components)
+ result = builder.detect_parent_child_map_for_items()
+
+ self.assertEqual(builder.parent_child_num_map, {})
+ self.assertEqual(builder.child_parent_num_map, {})
+ self.assertFalse(result)
+
+ def test_valid_input_components_but_no_sublists_found(self):
+ components = [
+ Paragraph(),
+ self.create_numbering_paragraph('1', '0'),
+ self.create_numbering_paragraph('1', '0'),
+ self.create_numbering_paragraph('2', '0'),
+ self.create_numbering_paragraph('2', '0'),
+ self.create_numbering_paragraph('3', '0'),
+ self.create_numbering_paragraph('3', '0'),
+ ]
+
+ list_start_stop_index = {
+ '1': {'start': 1, 'stop': 2},
+ '2': {'start': 3, 'stop': 4},
+ '3': {'start': 5, 'stop': 6},
+ }
+
+ builder = NumberingSpanBuilder(components)
+ result = builder.detect_parent_child_map_for_items()
+
+ self.assertDictEqual(builder.parent_child_num_map, {})
+ self.assertDictEqual(builder.child_parent_num_map, {})
+ self.assertDictEqual(builder.list_start_stop_index, list_start_stop_index)
+ self.assertTrue(result)
+
+ def test_sublist_found(self):
+ components = [
+ Paragraph(),
+ self.create_numbering_paragraph('1', '0'),
+ self.create_numbering_paragraph('2', '0'),
+ self.create_numbering_paragraph('2', '0'),
+ self.create_numbering_paragraph('1', '0'),
+ ]
+
+ builder = NumberingSpanBuilder(components)
+ result = builder.detect_parent_child_map_for_items()
+
+ parent_items = {
+ ('1', '0'):
+ [
+ {'num_id': '2', 'level': '0'},
+ ]
+ }
+ child_item = {
+ '2': {'num_id': '1', 'level': '0'}
+ }
+
+ list_start_stop_index = {
+ '1': {'start': 1, 'stop': 4},
+ '2': {'start': 2, 'stop': 3},
+ }
+
+ self.assertDictEqual(builder.parent_child_num_map, parent_items)
+ self.assertDictEqual(builder.child_parent_num_map, child_item)
+ self.assertEqual(builder.list_start_stop_index, list_start_stop_index)
+ self.assertTrue(result)
+
+ def test_nested_sublist_found(self):
+ components = [
+ self.create_numbering_paragraph('1', '0'),
+ self.create_numbering_paragraph('2', '0'),
+ self.create_numbering_paragraph('3', '0'),
+ self.create_numbering_paragraph('3', '0'),
+ self.create_numbering_paragraph('2', '0'),
+ self.create_numbering_paragraph('1', '0'),
+ ]
+
+ builder = NumberingSpanBuilder(components)
+ result = builder.detect_parent_child_map_for_items()
+
+ parent_items = {
+ ('1', '0'):
+ [
+ {'num_id': '2', 'level': '0'},
+ {'num_id': '3', 'level': '0'},
+ ],
+ ('2', '0'):
+ [
+ {'num_id': '3', 'level': '0'},
+ ]
+ }
+ child_item = {
+ '2': {'num_id': '1', 'level': '0'},
+ '3': {'num_id': '2', 'level': '0'},
+ }
+
+ list_start_stop_index = {
+ '1': {'start': 0, 'stop': 5},
+ '2': {'start': 1, 'stop': 4},
+ '3': {'start': 2, 'stop': 3},
+ }
+
+ self.assertDictEqual(builder.parent_child_num_map, parent_items)
+ self.assertDictEqual(builder.child_parent_num_map, child_item)
+ self.assertDictEqual(builder.list_start_stop_index, list_start_stop_index)
+ self.assertTrue(result)
+
+ def test_nested_sublist_not_wrapped_in_parent_item(self):
+ components = [
+ self.create_numbering_paragraph('1', '0'),
+ self.create_numbering_paragraph('2', '0'),
+ self.create_numbering_paragraph('3', '0'),
+ self.create_numbering_paragraph('3', '0'),
+ self.create_numbering_paragraph('1', '0'),
+ ]
+
+ builder = NumberingSpanBuilder(components)
+ result = builder.detect_parent_child_map_for_items()
+
+ parent_items = {
+ ('1', '0'):
+ [
+ {'num_id': '2', 'level': '0'},
+ {'num_id': '3', 'level': '0'},
+ ]
+ }
+ child_item = {
+ '2': {'num_id': '1', 'level': '0'},
+ '3': {'num_id': '1', 'level': '0'},
+ }
+
+ list_start_stop_index = {
+ '1': {'start': 0, 'stop': 4},
+ '2': {'start': 1, 'stop': 1},
+ '3': {'start': 2, 'stop': 3},
+ }
+
+ self.assertDictEqual(builder.parent_child_num_map, parent_items)
+ self.assertDictEqual(builder.child_parent_num_map, child_item)
+ self.assertDictEqual(builder.list_start_stop_index, list_start_stop_index)
+ self.assertTrue(result)
+
+ def test_nested_sublist_parent_with_different_level(self):
+ components = [
+ self.create_numbering_paragraph('1', '0'),
+ self.create_numbering_paragraph('1', '1'),
+ self.create_numbering_paragraph('3', '0'),
+ Paragraph(),
+ self.create_numbering_paragraph('3', '0'),
+ self.create_numbering_paragraph('1', '1'),
+ self.create_numbering_paragraph('2', '0'),
+ self.create_numbering_paragraph('2', '1'),
+ self.create_numbering_paragraph('4', '0'),
+ self.create_numbering_paragraph('4', '0'),
+ self.create_numbering_paragraph('1', '0'),
+ Paragraph(),
+ ]
+
+ builder = NumberingSpanBuilder(components)
+ result = builder.detect_parent_child_map_for_items()
+
+ parent_items = {
+ ('1', '0'):
+ [
+ {'num_id': '3', 'level': '0'},
+ {'num_id': '2', 'level': '0'},
+ {'num_id': '2', 'level': '1'},
+ {'num_id': '4', 'level': '0'},
+ ],
+ ('1', '1'):
+ [
+ {'num_id': '3', 'level': '0'},
+ ]
+ }
+ child_item = {
+ '2': {'num_id': '1', 'level': '0'},
+ '3': {'num_id': '1', 'level': '1'},
+ '4': {'num_id': '1', 'level': '0'},
+ }
+
+ list_start_stop_index = {
+ '1': {'start': 0, 'stop': 10},
+ '3': {'start': 2, 'stop': 4},
+ '2': {'start': 6, 'stop': 6},
+ '4': {'start': 8, 'stop': 9},
+ }
+
+ self.assertDictEqual(builder.parent_child_num_map, parent_items)
+ self.assertDictEqual(builder.child_parent_num_map, child_item)
+ self.assertDictEqual(builder.list_start_stop_index, list_start_stop_index)
+ self.assertTrue(result)
+
+ def test_nested_sublist_multiple_levels(self):
+ components = [
+ self.create_numbering_paragraph('1', '0'),
+ self.create_numbering_paragraph('1', '1'),
+ self.create_numbering_paragraph('2', '0'),
+ self.create_numbering_paragraph('3', '0'),
+ self.create_numbering_paragraph('4', '0'),
+ self.create_numbering_paragraph('3', '0'),
+ self.create_numbering_paragraph('2', '0'),
+ self.create_numbering_paragraph('1', '1'),
+ self.create_numbering_paragraph('1', '0'),
+ ]
+
+ builder = NumberingSpanBuilder(components)
+ result = builder.detect_parent_child_map_for_items()
+
+ parent_items = {
+ ('1', '0'):
+ [
+ {'num_id': '2', 'level': '0'},
+ {'num_id': '3', 'level': '0'},
+ {'num_id': '4', 'level': '0'},
+ ],
+ ('1', '1'):
+ [
+ {'num_id': '2', 'level': '0'},
+ {'num_id': '3', 'level': '0'},
+ {'num_id': '4', 'level': '0'},
+ ],
+ ('2', '0'):
+ [
+ {'num_id': '3', 'level': '0'},
+ {'num_id': '4', 'level': '0'},
+ ],
+ ('3', '0'):
+ [
+ {'num_id': '4', 'level': '0'},
+ ]
+ }
+ child_item = {
+ '2': {'num_id': '1', 'level': '1'},
+ '3': {'num_id': '2', 'level': '0'},
+ '4': {'num_id': '3', 'level': '0'},
+ }
+
+ self.assertDictEqual(builder.parent_child_num_map, parent_items)
+ self.assertDictEqual(builder.child_parent_num_map, child_item)
+ self.assertTrue(result)
+
+ def test_nested_sublist_parent_contains_child_and_child_parent(self):
+ components = [
+ self.create_numbering_paragraph('1', '0'),
+ self.create_numbering_paragraph('2', '1'),
+ self.create_numbering_paragraph('2', '1'),
+ self.create_numbering_paragraph('1', '0'),
+ self.create_numbering_paragraph('2', '1'),
+ self.create_numbering_paragraph('2', '2'),
+ ]
+
+ builder = NumberingSpanBuilder(components)
+ result = builder.detect_parent_child_map_for_items()
+
+ parent_items = {
+ ('1', '0'):
+ [
+ {'num_id': '2', 'level': '1'},
+ ]
+ }
+ child_item = {
+ '2': {'num_id': '1', 'level': '0'},
+ }
+
+ self.assertDictEqual(builder.parent_child_num_map, parent_items)
+ self.assertDictEqual(builder.child_parent_num_map, child_item)
+ self.assertTrue(result)
diff --git a/tests/export/test_xml.py b/tests/export/test_xml.py
index 7d81f3fd..2ad17a88 100644
--- a/tests/export/test_xml.py
+++ b/tests/export/test_xml.py
@@ -461,27 +461,29 @@ def get_xml(self):
return xml
-class SeperateListsTestCase(TranslationTestCase):
+class SeperateListsIntoParentListTestCase(TranslationTestCase):
expected_output = '''
- - AAA
-
-
- - BBB
-
-
- - CCC
+ -
+ AAA
+
+ - BBB
+ - CCC
+
+
+ - DDD
'''
def get_xml(self):
tags = [
DXB.li(text='AAA', ilvl=0, numId=2),
- # Because AAA and CCC are part of the same list (same list id)
- # and BBB is different, these need to be split into three
- # lists (or lose everything from BBB and after.
+ # Because AAA and DDD are part of the same list (same list id)
+ # and BBB,CCC are different, these need to be properly formatted
+ # into a single list where BBB,CCC are added as nested list to AAA item
DXB.li(text='BBB', ilvl=0, numId=1),
- DXB.li(text='CCC', ilvl=0, numId=2),
+ DXB.li(text='CCC', ilvl=0, numId=1),
+ DXB.li(text='DDD', ilvl=0, numId=2),
]
body = b''
for el in tags:
diff --git a/tests/fixtures/lists_with_margins.docx b/tests/fixtures/lists_with_margins.docx
new file mode 100644
index 00000000..a0db187c
Binary files /dev/null and b/tests/fixtures/lists_with_margins.docx differ
diff --git a/tests/fixtures/lists_with_margins.html b/tests/fixtures/lists_with_margins.html
new file mode 100644
index 00000000..f06c3c69
--- /dev/null
+++ b/tests/fixtures/lists_with_margins.html
@@ -0,0 +1,19 @@
+
+ - AAA
+ - BBB
+
+ - CCC
+ - DDD
+
+ - EEE
+ - FFF
+
+ - GGG
+ - HHH
+
+
+
+
+
+
+
diff --git a/tests/fixtures/lists_with_styles.html b/tests/fixtures/lists_with_styles.html
index fdce6dda..72c5c552 100644
--- a/tests/fixtures/lists_with_styles.html
+++ b/tests/fixtures/lists_with_styles.html
@@ -2,12 +2,12 @@
- AAA
- BBB
- - CCC
- - DDD
+
- CCC
+ - DDD
- - EEE
+
- EEE
- - FFF
+ - FFF
diff --git a/tests/fixtures/nested_lists.html b/tests/fixtures/nested_lists.html
index f5ba5329..6d8221f0 100644
--- a/tests/fixtures/nested_lists.html
+++ b/tests/fixtures/nested_lists.html
@@ -3,11 +3,11 @@
- two
- three
- - AAA
- - BBB
- - CCC
+
- AAA
+ - BBB
+ - CCC
- - alpha
+ - alpha
@@ -17,14 +17,14 @@
- xxx
- - yyy
+ - yyy
diff --git a/tests/fixtures/nested_lists_different_num_ids.docx b/tests/fixtures/nested_lists_different_num_ids.docx
new file mode 100644
index 00000000..df1fd978
Binary files /dev/null and b/tests/fixtures/nested_lists_different_num_ids.docx differ
diff --git a/tests/fixtures/nested_lists_different_num_ids.html b/tests/fixtures/nested_lists_different_num_ids.html
new file mode 100644
index 00000000..9f3562a9
--- /dev/null
+++ b/tests/fixtures/nested_lists_different_num_ids.html
@@ -0,0 +1,28 @@
+
+ - one
+ - two
+
+
+
+ - three
+
+
+ - four
+ - five
+
diff --git a/tests/fixtures/paragraph_with_margins.docx b/tests/fixtures/paragraph_with_margins.docx
new file mode 100644
index 00000000..2995cf0c
Binary files /dev/null and b/tests/fixtures/paragraph_with_margins.docx differ
diff --git a/tests/fixtures/paragraph_with_margins.html b/tests/fixtures/paragraph_with_margins.html
new file mode 100644
index 00000000..1b1098c5
--- /dev/null
+++ b/tests/fixtures/paragraph_with_margins.html
@@ -0,0 +1,11 @@
+Heading1
+
+ Heading2
+
+
+
+
+
+ Heading3
+
+
diff --git a/tests/openxml/wordprocessing/test_paragraph_properties.py b/tests/openxml/wordprocessing/test_paragraph_properties.py
index 9617abe4..6388afcd 100644
--- a/tests/openxml/wordprocessing/test_paragraph_properties.py
+++ b/tests/openxml/wordprocessing/test_paragraph_properties.py
@@ -105,3 +105,21 @@ def test_returns_left_minus_hanging_ignoring_first_line(self):
'''
properties = self._load_from_xml(xml)
self.assertEqual(properties.start_margin_position, 100)
+
+ def test_allow_decimal_indentation_for_hanging(self):
+ xml = '''
+
+
+
+ '''
+ properties = self._load_from_xml(xml)
+ self.assertEqual(properties.start_margin_position, 100)
+
+ def test_allow_decimal_indentation_for_first_line(self):
+ xml = '''
+
+
+
+ '''
+ properties = self._load_from_xml(xml)
+ self.assertEqual(properties.start_margin_position, 173)
diff --git a/tests/openxml/wordprocessing/test_run_properties.py b/tests/openxml/wordprocessing/test_run_properties.py
index 3d59fe14..bd1209d7 100644
--- a/tests/openxml/wordprocessing/test_run_properties.py
+++ b/tests/openxml/wordprocessing/test_run_properties.py
@@ -16,6 +16,16 @@ def _load_styles_from_xml(self, xml):
root = parse_xml_from_string(xml)
return RunProperties.load(root)
+ def test_run_properties_with_symbol_font(self):
+ xml = b'''
+
+
+
+ '''
+ properties = self._load_styles_from_xml(xml)
+
+ self.assertTrue(properties.r_fonts.is_symbol())
+
def test_bold_on(self):
xml = b'''