Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 6ec29e2

Browse files
committed
Issue #8373: The filesystem path of AF_UNIX sockets now uses the filesystem
encoding and the surrogateescape error handler, rather than UTF-8. Patch by David Watson.
1 parent ab0e9f7 commit 6ec29e2

4 files changed

Lines changed: 101 additions & 9 deletions

File tree

Doc/library/socket.rst

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,23 @@ Socket families
4040
Depending on the system and the build options, various socket families
4141
are supported by this module.
4242

43-
Socket addresses are represented as follows:
43+
The address format required by a particular socket object is automatically
44+
selected based on the address family specified when the socket object was
45+
created. Socket addresses are represented as follows:
46+
47+
- The address of an :const:`AF_UNIX` socket bound to a file system node
48+
is represented as a string, using the file system encoding and the
49+
``'surrogateescape'`` error handler (see :pep:`383`). An address in
50+
Linux's abstract namespace is returned as a :class:`bytes` object with
51+
an initial null byte; note that sockets in this namespace can
52+
communicate with normal file system sockets, so programs intended to
53+
run on Linux may need to deal with both types of address. A string or
54+
:class:`bytes` object can be used for either type of address when
55+
passing it as an argument.
4456

45-
- A single string is used for the :const:`AF_UNIX` address family.
57+
.. versionchanged:: 3.3
58+
Previously, :const:`AF_UNIX` socket paths were assumed to use UTF-8
59+
encoding.
4660

4761
- A pair ``(host, port)`` is used for the :const:`AF_INET` address family,
4862
where *host* is a string representing either a hostname in Internet domain

Lib/test/test_socket.py

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1538,7 +1538,6 @@ def testRecvFromNegative(self):
15381538
def _testRecvFromNegative(self):
15391539
self.cli.sendto(MSG, 0, (HOST, self.port))
15401540

1541-
15421541
# Tests for the sendmsg()/recvmsg() interface. Where possible, the
15431542
# same test code is used with different families and types of socket
15441543
# (e.g. stream, datagram), and tests using recvmsg() are repeated
@@ -4241,6 +4240,66 @@ def testNameOverflow(self):
42414240
with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as s:
42424241
self.assertRaises(socket.error, s.bind, address)
42434242

4243+
def testStrName(self):
4244+
# Check that an abstract name can be passed as a string.
4245+
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
4246+
try:
4247+
s.bind("\x00python\x00test\x00")
4248+
self.assertEqual(s.getsockname(), b"\x00python\x00test\x00")
4249+
finally:
4250+
s.close()
4251+
4252+
class TestUnixDomain(unittest.TestCase):
4253+
4254+
def setUp(self):
4255+
self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
4256+
4257+
def tearDown(self):
4258+
self.sock.close()
4259+
4260+
def encoded(self, path):
4261+
# Return the given path encoded in the file system encoding,
4262+
# or skip the test if this is not possible.
4263+
try:
4264+
return os.fsencode(path)
4265+
except UnicodeEncodeError:
4266+
self.skipTest(
4267+
"Pathname {0!a} cannot be represented in file "
4268+
"system encoding {1!r}".format(
4269+
path, sys.getfilesystemencoding()))
4270+
4271+
def testStrAddr(self):
4272+
# Test binding to and retrieving a normal string pathname.
4273+
path = os.path.abspath(support.TESTFN)
4274+
self.sock.bind(path)
4275+
self.addCleanup(support.unlink, path)
4276+
self.assertEqual(self.sock.getsockname(), path)
4277+
4278+
def testBytesAddr(self):
4279+
# Test binding to a bytes pathname.
4280+
path = os.path.abspath(support.TESTFN)
4281+
self.sock.bind(self.encoded(path))
4282+
self.addCleanup(support.unlink, path)
4283+
self.assertEqual(self.sock.getsockname(), path)
4284+
4285+
def testSurrogateescapeBind(self):
4286+
# Test binding to a valid non-ASCII pathname, with the
4287+
# non-ASCII bytes supplied using surrogateescape encoding.
4288+
path = os.path.abspath(support.TESTFN_UNICODE)
4289+
b = self.encoded(path)
4290+
self.sock.bind(b.decode("ascii", "surrogateescape"))
4291+
self.addCleanup(support.unlink, path)
4292+
self.assertEqual(self.sock.getsockname(), path)
4293+
4294+
def testUnencodableAddr(self):
4295+
# Test binding to a pathname that cannot be encoded in the
4296+
# file system encoding.
4297+
if support.TESTFN_UNENCODABLE is None:
4298+
self.skipTest("No unencodable filename available")
4299+
path = os.path.abspath(support.TESTFN_UNENCODABLE)
4300+
self.sock.bind(path)
4301+
self.addCleanup(support.unlink, path)
4302+
self.assertEqual(self.sock.getsockname(), path)
42444303

42454304
@unittest.skipUnless(thread, 'Threading required for this test.')
42464305
class BufferIOTest(SocketConnectedTest):
@@ -4517,6 +4576,8 @@ def test_main():
45174576
])
45184577
if hasattr(socket, "socketpair"):
45194578
tests.append(BasicSocketPairTest)
4579+
if hasattr(socket, "AF_UNIX"):
4580+
tests.append(TestUnixDomain)
45204581
if sys.platform == 'linux':
45214582
tests.append(TestLinuxAbstractNamespace)
45224583
if isTipcAvailable():

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,10 @@ Core and Builtins
419419
Library
420420
-------
421421

422+
- Issue #8373: The filesystem path of AF_UNIX sockets now uses the filesystem
423+
encoding and the surrogateescape error handler, rather than UTF-8. Patch
424+
by David Watson.
425+
422426
- Issue #10350: Read and save errno before calling a function which might
423427
overwrite it. Original patch by Hallvard B Furuseth.
424428

Modules/socketmodule.c

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1073,7 +1073,7 @@ makesockaddr(SOCKET_T sockfd, struct sockaddr *addr, size_t addrlen, int proto)
10731073
#endif /* linux */
10741074
{
10751075
/* regular NULL-terminated string */
1076-
return PyUnicode_FromString(a->sun_path);
1076+
return PyUnicode_DecodeFSDefault(a->sun_path);
10771077
}
10781078
}
10791079
#endif /* AF_UNIX */
@@ -1269,8 +1269,18 @@ getsockaddrarg(PySocketSockObject *s, PyObject *args,
12691269
struct sockaddr_un* addr;
12701270
char *path;
12711271
int len;
1272-
if (!PyArg_Parse(args, "s#", &path, &len))
1273-
return 0;
1272+
int retval = 0;
1273+
1274+
/* PEP 383. Not using PyUnicode_FSConverter since we need to
1275+
allow embedded nulls on Linux. */
1276+
if (PyUnicode_Check(args)) {
1277+
if ((args = PyUnicode_EncodeFSDefault(args)) == NULL)
1278+
return 0;
1279+
}
1280+
else
1281+
Py_INCREF(args);
1282+
if (!PyArg_Parse(args, "y#", &path, &len))
1283+
goto unix_out;
12741284

12751285
addr = (struct sockaddr_un*)addr_ret;
12761286
#ifdef linux
@@ -1279,7 +1289,7 @@ getsockaddrarg(PySocketSockObject *s, PyObject *args,
12791289
if (len > sizeof addr->sun_path) {
12801290
PyErr_SetString(PyExc_OSError,
12811291
"AF_UNIX path too long");
1282-
return 0;
1292+
goto unix_out;
12831293
}
12841294
}
12851295
else
@@ -1289,7 +1299,7 @@ getsockaddrarg(PySocketSockObject *s, PyObject *args,
12891299
if (len >= sizeof addr->sun_path) {
12901300
PyErr_SetString(PyExc_OSError,
12911301
"AF_UNIX path too long");
1292-
return 0;
1302+
goto unix_out;
12931303
}
12941304
addr->sun_path[len] = 0;
12951305
}
@@ -1300,7 +1310,10 @@ getsockaddrarg(PySocketSockObject *s, PyObject *args,
13001310
#else
13011311
*len_ret = len + offsetof(struct sockaddr_un, sun_path);
13021312
#endif
1303-
return 1;
1313+
retval = 1;
1314+
unix_out:
1315+
Py_DECREF(args);
1316+
return retval;
13041317
}
13051318
#endif /* AF_UNIX */
13061319

0 commit comments

Comments
 (0)