Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 5517254

Browse files
committed
Handle parenthesis around links
1 parent 10bd33c commit 5517254

File tree

5 files changed

+67
-60
lines changed

5 files changed

+67
-60
lines changed

lib/markdown2.py

Lines changed: 57 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1006,42 +1006,15 @@ def _sanitize_html(self, s):
10061006
raise MarkdownError("invalid value for 'safe_mode': %r (must be "
10071007
"'escape' or 'replace')" % self.safe_mode)
10081008

1009-
"""
1010-
The expression [^ \t'"]* is used instead of .* because of special cases
1011-
for links. Specifically: inline links, quotes in links, and odd anchors.
1012-
"""
1013-
_tail_of_inline_link_re_str = r'''
1014-
\( # literal paren
1015-
[ \t]*
1016-
(?P<url> # \1
1017-
<[^{char_blacklist}]*>
1018-
|
1019-
[^{char_blacklist}]*
1020-
)
1021-
{title_separator}
1022-
( # \2
1023-
{title_prefix}
1024-
(['"]) # quote char = \3
1009+
_inline_link_title = re.compile(r'''
1010+
( # \1
1011+
[ \t]+
1012+
(['"]) # quote char = \2
10251013
(?P<title>.*?)
1026-
\3 # matching quote
1014+
\2
10271015
)? # title is optional
1028-
\)
1029-
''';
1030-
_tail_of_inline_link_re = re.compile(
1031-
# Match tail of: [text](/url/) or [text](/url/ "title")
1032-
_tail_of_inline_link_re_str.format(
1033-
char_blacklist=r''' \t'"''',
1034-
title_separator=r'''[ \t]*''',
1035-
title_prefix="",
1036-
), re.X)
1037-
_tail_of_inline_link_wth_whtspc_re = re.compile(
1038-
# Special case of the above where url contains whitespace
1039-
# but no closing parensthesis
1040-
_tail_of_inline_link_re_str.format(
1041-
char_blacklist=r'''\)'"''',
1042-
title_separator="",
1043-
title_prefix=r'''[ \t]+''',
1044-
), re.X)
1016+
\)$
1017+
''', re.X | re.S)
10451018
_tail_of_reference_link_re = re.compile(r'''
10461019
# Match tail of: [text][id]
10471020
[ ]? # one optional space
@@ -1051,6 +1024,52 @@ def _sanitize_html(self, s):
10511024
\]
10521025
''', re.X | re.S)
10531026

1027+
_whitespace = re.compile(r'\s*')
1028+
1029+
_strip_anglebrackets = re.compile(r'<(.*)>.*')
1030+
1031+
def _find_non_whitespace(self, text, start):
1032+
"""Returns the index of the first non-whitespace character in text
1033+
after (and including) start
1034+
"""
1035+
match = self._whitespace.match(text, start)
1036+
return match.end()
1037+
1038+
def _find_balanced(self, text, start, open_c, close_c):
1039+
"""Returns the index where the open_c and close_c characters balance
1040+
out - the same number of open_c and close_c are encountered - or the
1041+
end of string if it's reached before the balance point is found.
1042+
"""
1043+
i = start
1044+
l = len(text)
1045+
count = 1
1046+
while count > 0 and i < l:
1047+
if text[i] == open_c:
1048+
count += 1
1049+
elif text[i] == close_c:
1050+
count -= 1
1051+
i += 1
1052+
return i
1053+
1054+
def _extract_url_and_title(self, text, start):
1055+
"""Extracts the url and (optional) title from the tail of a link"""
1056+
# text[start] equals the opening parenthesis
1057+
idx = self._find_non_whitespace(text, start+1)
1058+
if idx == len(text):
1059+
return None, None, None
1060+
end_idx = idx
1061+
has_anglebrackets = text[idx] == "<"
1062+
if has_anglebrackets:
1063+
end_idx = self._find_balanced(text, end_idx+1, "<", ">")
1064+
end_idx = self._find_balanced(text, end_idx, "(", ")")
1065+
match = self._inline_link_title.search(text, idx, end_idx)
1066+
if not match:
1067+
return None, None, None
1068+
url, title = text[idx:match.start()], match.group("title")
1069+
if has_anglebrackets:
1070+
url = self._strip_anglebrackets.sub(r'\1', url)
1071+
return url, title, end_idx
1072+
10541073
def _do_links(self, text):
10551074
"""Turn Markdown link shortcuts into XHTML <a> and <img> tags.
10561075
@@ -1133,23 +1152,13 @@ def _do_links(self, text):
11331152

11341153
# Inline anchor or img?
11351154
if text[p] == '(': # attempt at perf improvement
1136-
m1 = self._tail_of_inline_link_re.match(text, p)
1137-
m2 = self._tail_of_inline_link_wth_whtspc_re.match(text, p)
1138-
if m1 and m2:
1139-
match = m1 if m1.end() >= m2.end() else m2
1140-
elif m1:
1141-
match = m1
1142-
else:
1143-
match = m2
1144-
if match:
1155+
url, title, url_end_idx = self._extract_url_and_title(text, p)
1156+
if url is not None:
11451157
# Handle an inline anchor or img.
11461158
is_img = start_idx > 0 and text[start_idx-1] == "!"
11471159
if is_img:
11481160
start_idx -= 1
11491161

1150-
url, title = match.group("url"), match.group("title")
1151-
if url and url[0] == '<':
1152-
url = url[1:-1] # '<url>' -> 'url'
11531162
# We've got to encode these to avoid conflicting
11541163
# with italics/bold.
11551164
url = url.replace('*', self._escape_table['*']) \
@@ -1170,7 +1179,7 @@ def _do_links(self, text):
11701179
if "smarty-pants" in self.extras:
11711180
result = result.replace('"', self._escape_table['"'])
11721181
curr_pos = start_idx + len(result)
1173-
text = text[:start_idx] + result + text[match.end():]
1182+
text = text[:start_idx] + result + text[url_end_idx:]
11741183
elif start_idx >= anchor_allowed_pos:
11751184
result_head = '<a href="%s"%s>' % (url, title_str)
11761185
result = '%s%s</a>' % (result_head, link_text)
@@ -1180,7 +1189,7 @@ def _do_links(self, text):
11801189
# anchor_allowed_pos on.
11811190
curr_pos = start_idx + len(result_head)
11821191
anchor_allowed_pos = start_idx + len(result)
1183-
text = text[:start_idx] + result + text[match.end():]
1192+
text = text[:start_idx] + result + text[url_end_idx:]
11841193
else:
11851194
# Anchor not allowed here.
11861195
curr_pos = start_idx + 1
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<p>(<a href="/url/">link text</a>)</p>
2+
3+
<p>(<a href="/url/" title="title">link text</a>)</p>
4+
5+
<p>(<a href="/url/" title="title">link text</a>)</p>
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
([link text](/url/))
2+
3+
([link text](/url/ "title"))
4+
5+
([link text](/url/ 'title'))

test/markdowntest-cases/Parens in urls.html

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,3 @@
99
<p><a href="/ur(unblancedr/" title="title">link text</a></p>
1010

1111
<p><a href="/ur(unblancedr/" title="title">link text</a></p>
12-
13-
<p><a href="/unbalancedl)/">link text</a></p>
14-
15-
<p><a href="/unbalancedl)/" title="title">link text</a></p>
16-
17-
<p><a href="/unbalancedl)/" title="title">link text</a></p>

test/markdowntest-cases/Parens in urls.text

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,3 @@
99
[link text](/ur(unblancedr/ "title")
1010

1111
[link text](/ur(unblancedr/ 'title')
12-
13-
[link text](/unbalancedl)/)
14-
15-
[link text](/unbalancedl)/ "title")
16-
17-
[link text](/unbalancedl)/ 'title')

0 commit comments

Comments
 (0)