@@ -410,29 +410,48 @@ \subsection{String literals\label{strings}}
410410\index {Standard C}
411411\index {C}
412412
413- \begin {tableii }{l|l}{code}{Escape Sequence}{Meaning}
414- \lineii {\e\var {newline}} {Ignored}
415- \lineii {\e\e } {Backslash (\code {\e })}
416- \lineii {\e '} {Single quote (\code {'})}
417- \lineii {\e "} {Double quote (\code {"})}
418- \lineii {\e a} {\ASCII {} Bell (BEL)}
419- \lineii {\e b} {\ASCII {} Backspace (BS)}
420- \lineii {\e f} {\ASCII {} Formfeed (FF)}
421- \lineii {\e n} {\ASCII {} Linefeed (LF)}
422- \lineii {\e N\{ \var {name}\} }
423- {Character named \var {name} in the Unicode database (Unicode only)}
424- \lineii {\e r} {\ASCII {} Carriage Return (CR)}
425- \lineii {\e t} {\ASCII {} Horizontal Tab (TAB)}
426- \lineii {\e u\var {xxxx}} {Character with 16-bit hex value \var {xxxx} (Unicode only)}
427- \lineii {\e U\var {xxxxxxxx}}{Character with 32-bit hex value \var {xxxxxxxx} (Unicode only)}
428- \lineii {\e v} {\ASCII {} Vertical Tab (VT)}
429- \lineii {\e\var {ooo}} {\ASCII {} character with octal value \var {ooo}}
430- \lineii {\e x\var {hh}} {\ASCII {} character with hex value \var {hh}}
431- \end {tableii }
413+ \begin {tableiii }{l|l|c}{code}{Escape Sequence}{Meaning}{Notes}
414+ \lineiii {\e\var {newline}} {Ignored}{}
415+ \lineiii {\e\e } {Backslash (\code {\e })}{}
416+ \lineiii {\e '} {Single quote (\code {'})}{}
417+ \lineiii {\e "} {Double quote (\code {"})}{}
418+ \lineiii {\e a} {\ASCII {} Bell (BEL)}{}
419+ \lineiii {\e b} {\ASCII {} Backspace (BS)}{}
420+ \lineiii {\e f} {\ASCII {} Formfeed (FF)}{}
421+ \lineiii {\e n} {\ASCII {} Linefeed (LF)}{}
422+ \lineiii {\e N\{ \var {name}\} }
423+ {Character named \var {name} in the Unicode database (Unicode only)}{}
424+ \lineiii {\e r} {\ASCII {} Carriage Return (CR)}{}
425+ \lineiii {\e t} {\ASCII {} Horizontal Tab (TAB)}{}
426+ \lineiii {\e u\var {xxxx}}
427+ {Character with 16-bit hex value \var {xxxx} (Unicode only)}{(1)}
428+ \lineiii {\e U\var {xxxxxxxx}}
429+ {Character with 32-bit hex value \var {xxxxxxxx} (Unicode only)}{(2)}
430+ \lineiii {\e v} {\ASCII {} Vertical Tab (VT)}{}
431+ \lineiii {\e\var {ooo}} {\ASCII {} character with octal value \var {ooo}}{(3)}
432+ \lineiii {\e x\var {hh}} {\ASCII {} character with hex value \var {hh}}{(4)}
433+ \end {tableiii }
432434\index {ASCII@\ASCII }
433435
434- As in Standard C, up to three octal digits are accepted. However,
435- exactly two hex digits are taken in hex escapes.
436+ \noindent
437+ Notes:
438+
439+ \begin {itemize }
440+ \item [(1)]
441+ Individual code units which form parts of a surrogate pair can be
442+ encoded using this escape sequence.
443+ \item [(2)]
444+ Any Unicode character can be encoded this way, but characters
445+ outside the Basic Multilingual Plane (BMP) will be encoded using a
446+ surrogate pair if Python is compiled to use 16-bit code units (the
447+ default). Individual code units which form parts of a surrogate
448+ pair can be encoded using this escape sequence.
449+ \item [(3)]
450+ As in Standard C, up to three octal digits are accepted.
451+ \item [(4)]
452+ Unlike in Standard C, at most two hex digits are accepted.
453+ \end {itemize }
454+
436455
437456Unlike Standard \index {unrecognized escape sequence}C,
438457all unrecognized escape sequences are left in the string unchanged,
@@ -460,12 +479,12 @@ \subsection{String literals\label{strings}}
460479When an \character {r} or \character {R} prefix is used in conjunction
461480with a \character {u} or \character {U} prefix, then the \code {\e uXXXX}
462481escape sequence is processed while \emph {all other backslashes are
463- left in the string }. For example, the string literal \code {ur" \e
464- u0062\e n"} consists of three Unicode characters: `LATIN SMALL LETTER
465- B', `REVERSE SOLIDUS', and `LATIN SMALL LETTER N'. Backslashes can be
466- escaped with a preceding backslash; however, both remain in the
467- string. As a result, \code {\e uXXXX} escape sequences are only
468- recognized when there are an odd number of backslashes.
482+ left in the string }. For example, the string literal
483+ \code {ur" \e {} u0062\e n"} consists of three Unicode characters: `LATIN
484+ SMALL LETTER B', `REVERSE SOLIDUS', and `LATIN SMALL LETTER N'.
485+ Backslashes can be escaped with a preceding backslash; however, both
486+ remain in the string. As a result, \code {\e uXXXX} escape sequences
487+ are only recognized when there are an odd number of backslashes.
469488
470489\subsection {String literal concatenation\label {string-catenation } }
471490
0 commit comments