python
diff --git a/‎Doc/lib/lib.tex‎
Lines changed: 1 addition & 0 deletions b/‎Doc/lib/lib.tex‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Doc/lib/libcodecs.tex‎
Lines changed: 71 additions & 1 deletion b/‎Doc/lib/libcodecs.tex‎
Lines changed: 71 additions & 1 deletion
diff --git a/‎Doc/lib/libstringprep.tex‎
Lines changed: 134 additions & 0 deletions b/‎Doc/lib/libstringprep.tex‎
Lines changed: 134 additions & 0 deletions
diff --git a/‎Doc/whatsnew/whatsnew23.tex‎
Lines changed: 21 additions & 0 deletions b/‎Doc/whatsnew/whatsnew23.tex‎
Lines changed: 21 additions & 0 deletions
@@ -112,6 +112,7 @@ \chapter*{Front Matter\label{front}}
 \input{libtextwrap}
 \input{libcodecs}
 \input{libunicodedata}
+\input{libstringprep}
 
 \input{libmisc}                 % Miscellaneous Services
 \input{libpydoc}
 
@@ -5,7 +5,7 @@ \section{\module{codecs} ---
 \modulesynopsis{Encode and decode data and streams.}
 \moduleauthor{Marc-Andre Lemburg}{[email protected]}
 \sectionauthor{Marc-Andre Lemburg}{[email protected]}
-
+\sectionauthor{Martin v. L\"owis}{[email protected]}
 
 \index{Unicode}
 \index{Codecs}
@@ -809,6 +809,11 @@ \subsection{Standard Encodings}
          {byte string}
          {Convert operand to hexadecimal representation, with two digits per byte}
 
+\lineiv{idna}
+         {}
+         {Unicode string}
+         {Implements \rfc{3490}. \versionadded{2.3}. See also \module{encodings.idna}}
+
 \lineiv{mbcs}
          {dbcs}
          {Unicode string}
@@ -819,6 +824,11 @@ \subsection{Standard Encodings}
          {Unicode string}
          {Encoding of PalmOS 3.5}
 
+\lineiv{punycode}
+         {}
+         {Unicode string}
+         {Implements \rfc{3492}. \versionadded{2.3}}
+
 \lineiv{quopri_codec}
          {quopri, quoted-printable, quotedprintable}
          {byte string}
@@ -865,3 +875,63 @@ \subsection{Standard Encodings}
          {Compress the operand using gzip}
 
 \end{tableiv}
+
+\subsection{\module{encodings.idna} ---
+            Internationalized Domain Names in Applications}
+
+\declaremodule{standard}{encodings.idna}
+\modulesynopsis{Internationalized Domain Names implementation}
+\moduleauthor{Martin v. L\"owis}
+
+This module implements \rfc{3490} (Internationalized Domain Names in
+Applications) and \rfc{3492} (Nameprep: A Stringprep Profile for
+Internationalized Domain Names (IDN)). It builds upon the
+\code{punycode} encoding and \module{stringprep}. \versionadded{2.3}
+
+These RFCs together define a protocol to support non-ASCII characters
+in domain names. A domain name containing non-ASCII characters (such
+as ``www.Alliancefran\,caise.nu'') is converted into an
+ASCII-compatible encoding (ACE, such as
+``www.xn--alliancefranaise-npb.nu''). The ACE form of the domain name
+is then used in all places where arbitrary characters are not allowed
+by the protocol, such as DNS queries, HTTP \code{Host:} fields, and so
+on. This conversion is carried out in the application; if possible
+invisible to the user: The application should transparently convert
+Unicode domain labels to IDNA on the wire, and convert back ACE labels
+to Unicode before presenting them to the user.
+
+Python supports this conversion in several ways: The \code{idna} codec
+allows to convert between Unicode and the ACE. Furthermore, the
+\module{socket} module transparently converts Unicode host names to
+ACE, so that applications need not be concerned about converting host
+names themselves when they pass them to the socket module. On top of
+that, modules that have host names as function parameters, such as
+\module{httplib} and \module{ftplib}, accept Unicode host names
+(\module{httplib} then also transparently sends an IDNA hostname in
+the \code{Host:} field if it sends that field at all). 
+
+When receiving host names from the wire (such as in reverse name
+lookup), no automatic conversion to Unicode is performed: Applications
+wishing to present such host names to the user should decode them to
+Unicode.
+
+The module \module{encodings.idna} also implements the nameprep
+procedure, which performs certain normalizations on host names, to
+achieve case-insensitivity of international domain names, and to unify
+similar characters. The nameprep functions can be used directly if
+desired.
+
+\begin{funcdesc}{nameprep}{label}
+Return the nameprepped version of \var{label}. The implementation
+currently assumes query strings, so \code{AllowUnassigned} is
+true.
+\end{funcdesc}
+
+\begin{funcdesc}{ToASCCII}{label}
+Convert a label to ASCII, as specified in \rfc{3490}.
+\code{UseSTD3ASCIIRules} is assumed to be false.
+\end{funcdesc}
+
+\begin{funcdesc}{ToUnicode}{label}
+Convert a label to Unicode, as specified in \rfc{3490}.
+\end{funcdesc}
@@ -0,0 +1,134 @@
+\section{\module{stringprep} ---
+         Internet String Preparation}
+
+\declaremodule{standard}{stringprep}
+\modulesynopsis{String preparation, as per RFC 3453}
+\moduleauthor{Martin v. L\"owis}{[email protected]}
+\sectionauthor{Martin v. L\"owis}{[email protected]}
+
+When identifying things (such as host names) in the internet, it is
+often necessary to compare such identifications for
+``equality''. Exactly how this comparison is executed may depend on
+the application domain, e.g. whether it should be case-insensitive or
+not. It may be also necessary to restrict the possible
+identifications, to allow only identifications consisting of
+``printable'' characters.
+
+\rfc{3454} defines a procedure for ``preparing'' Unicode strings in
+internet protocols. Before passing strings onto the wire, they are
+processed with the preparation procedure, after which they have a
+certain normalized form. The RFC defines a set of tables, which can be
+combined into profiles. Each profile must define which tables it uses,
+and what other optional parts of the \code{stringprep} procedure are
+part of the profile. One example of a \code{stringprep} profile is
+\code{nameprep}, which is used for internationalized domain names.
+
+The module \module{stringprep} only exposes the tables from RFC
+3454. As these tables would be very large to represent them as
+dictionaries or lists, the module uses the Unicode character database
+internally. The module source code itself was generated using the
+\code{mkstringprep.py} utility.
+
+As a result, these tables are exposed as functions, not as data
+structures. There are two kinds of tables in the RFC: sets and
+mappings. For a set, \module{stringprep} provides the ``characteristic
+function'', i.e. a function that returns true if the parameter is part
+of the set. For mappings, it provides the mapping function: given the
+key, it returns the associated value. Below is a list of all functions
+available in the module.
+
+\begin{funcdesc}{in_table_a1}{code}
+Determine whether \var{code} is in table{A.1} (Unassigned code points
+in Unicode 3.2).
+\end{funcdesc}
+
+\begin{funcdesc}{in_table_b1}{code}
+Determine whether \var{code} is in table{B.1} (Commonly mapped to
+nothing).
+\end{funcdesc}
+
+\begin{funcdesc}{map_table_b2}{code}
+Return the mapped value for \var{code} according to table{B.2} 
+(Mapping for case-folding used with NFKC).
+\end{funcdesc}
+
+\begin{funcdesc}{map_table_b3}{code}
+Return the mapped value for \var{code} according to table{B.3} 
+(Mapping for case-folding used with no normalization).
+\end{funcdesc}
+
+\begin{funcdesc}{in_table_c11}{code}
+Determine whether \var{code} is in table{C.1.1} 
+(ASCII space characters).
+\end{funcdesc}
+
+\begin{funcdesc}{in_table_c12}{code}
+Determine whether \var{code} is in table{C.1.2} 
+(Non-ASCII space characters).
+\end{funcdesc}
+
+\begin{funcdesc}{in_table_c11_c12}{code}
+Determine whether \var{code} is in table{C.1} 
+(Space characters, union of C.1.1 and C.1.2).
+\end{funcdesc}
+
+\begin{funcdesc}{in_table_c21}{code}
+Determine whether \var{code} is in table{C.2.1} 
+(ASCII control characters).
+\end{funcdesc}
+
+\begin{funcdesc}{in_table_c22}{code}
+Determine whether \var{code} is in table{C.2.2} 
+(Non-ASCII control characters).
+\end{funcdesc}
+
+\begin{funcdesc}{in_table_c21_c22}{code}
+Determine whether \var{code} is in table{C.2} 
+(Control characters, union of C.2.1 and C.2.2).
+\end{funcdesc}
+
+\begin{funcdesc}{in_table_c3}{code}
+Determine whether \var{code} is in table{C.3} 
+(Private use).
+\end{funcdesc}
+
+\begin{funcdesc}{in_table_c4}{code}
+Determine whether \var{code} is in table{C.4} 
+(Non-character code points).
+\end{funcdesc}
+
+\begin{funcdesc}{in_table_c5}{code}
+Determine whether \var{code} is in table{C.5} 
+(Surrogate codes).
+\end{funcdesc}
+
+\begin{funcdesc}{in_table_c6}{code}
+Determine whether \var{code} is in table{C.6} 
+(Inappropriate for plain text).
+\end{funcdesc}
+
+\begin{funcdesc}{in_table_c7}{code}
+Determine whether \var{code} is in table{C.7} 
+(Inappropriate for canonical representation).
+\end{funcdesc}
+
+\begin{funcdesc}{in_table_c8}{code}
+Determine whether \var{code} is in table{C.8} 
+(Change display properties or are deprecated).
+\end{funcdesc}
+
+\begin{funcdesc}{in_table_c9}{code}
+Determine whether \var{code} is in table{C.9} 
+(Tagging characters).
+\end{funcdesc}
+
+\begin{funcdesc}{in_table_d1}{code}
+Determine whether \var{code} is in table{D.1} 
+(Characters with bidirectional property ``R'' or ``AL'').
+\end{funcdesc}
+
+\begin{funcdesc}{in_table_d2}{code}
+Determine whether \var{code} is in table{D.2} 
+(Characters with bidirectional property ``L'').
+\end{funcdesc}
+
@@ -1791,6 +1791,27 @@ \section{New, Improved, and Deprecated Modules}
 
 Any breakage caused by this change should be reported as a bug.
 
+\item Support for internationalized domain names (RFCs 3454, 3490,
+3491, and 3492) has been added. The ``idna'' encoding can be used
+to convert between a Unicode domain name and the ASCII-compatible
+encoding (ACE).
+
+\begin{verbatim}
+>>> u"www.Alliancefran\,caise.nu".encode("idna")
+'www.xn--alliancefranaise-npb.nu'
+\end{verbatim}
+
+In addition, the \module{socket} has been extended to transparently
+convert Unicode hostnames to the ACE before passing them to the C
+library. In turn, modules that pass hostnames ``through'' (such as
+\module{httplib}, \module{ftplib}) also support Unicode host names
+(httplib also sends ACE Host: headers). \module{urllib} supports
+Unicode URLs with non-ASCII host names as long as the \code{path} part
+of the URL is ASCII only.
+
+To implement this change, the module \module{stringprep}, the tool
+\code{mkstringprep} and the \code{punycode} encoding have been added.
+
 \end{itemize}