Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Add collation option and set_character_set() to Connection #1119

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 40 additions & 3 deletions pymysql/connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ class Connection:
(default: None - no timeout)
:param write_timeout: The timeout for writing to the connection in seconds.
(default: None - no timeout)
:param charset: Charset to use.
:param str charset: Charset to use.
:param str collation: Collation name to use.
:param sql_mode: Default SQL_MODE to use.
:param read_default_file:
Specifies my.cnf file to read these parameters from under the [client] section.
Expand Down Expand Up @@ -174,6 +175,7 @@ def __init__(
unix_socket=None,
port=0,
charset="",
collation=None,
sql_mode=None,
read_default_file=None,
conv=None,
Expand Down Expand Up @@ -308,6 +310,7 @@ def _config(key, arg):
self._write_timeout = write_timeout

self.charset = charset or DEFAULT_CHARSET
self.collation = collation
self.use_unicode = use_unicode

self.encoding = charset_by_name(self.charset).encoding
Expand Down Expand Up @@ -593,13 +596,32 @@ def ping(self, reconnect=True):
raise

def set_charset(self, charset):
"""Deprecated. Use set_character_set() instead."""
# This function has been implemented in old PyMySQL.
# But this name is different from MySQLdb.
# So we keep this function for compatibility and add
# new set_character_set() function.
self.set_character_set(charset)

def set_character_set(self, charset, collation=None):
"""
Set charaset (and collation)

Send "SET NAMES charset [COLLATE collation]" query.
Update Connection.encoding based on charset.
"""
# Make sure charset is supported.
encoding = charset_by_name(charset).encoding

self._execute_command(COMMAND.COM_QUERY, "SET NAMES %s" % self.escape(charset))
if collation:
query = f"SET NAMES {charset} COLLATE {collation}"
else:
query = f"SET NAMES {charset}"
self._execute_command(COMMAND.COM_QUERY, query)
self._read_packet()
self.charset = charset
self.encoding = encoding
self.collation = collation

def connect(self, sock=None):
self._closed = False
Expand Down Expand Up @@ -641,15 +663,30 @@ def connect(self, sock=None):
self._get_server_information()
self._request_authentication()

# Send "SET NAMES" query on init for:
# - Ensure charaset (and collation) is set to the server.
# - collation_id in handshake packet may be ignored.
# - If collation is not specified, we don't know what is server's
# default collation for the charset. For example, default collation
# of utf8mb4 is:
# - MySQL 5.7, MariaDB 10.x: utf8mb4_general_ci
# - MySQL 8.0: utf8mb4_0900_ai_ci
#
# Reference:
# - https://github.com/PyMySQL/PyMySQL/issues/1092
# - https://github.com/wagtail/wagtail/issues/9477
# - https://zenn.dev/methane/articles/2023-mysql-collation (Japanese)
self.set_character_set(self.charset, self.collation)

if self.sql_mode is not None:
c = self.cursor()
c.execute("SET sql_mode=%s", (self.sql_mode,))
c.close()

if self.init_command is not None:
c = self.cursor()
c.execute(self.init_command)
c.close()
self.commit()

if self.autocommit_mode is not None:
self.autocommit(self.autocommit_mode)
Expand Down
14 changes: 14 additions & 0 deletions pymysql/tests/test_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,20 @@ def test_utf8mb4(self):
arg["charset"] = "utf8mb4"
pymysql.connect(**arg)

def test_set_character_set(self):
con = self.connect()
cur = con.cursor()

con.set_character_set("latin1")
cur.execute("SELECT @@character_set_connection")
self.assertEqual(cur.fetchone(), ("latin1",))
self.assertEqual(con.encoding, "cp1252")

con.set_character_set("utf8mb4", "utf8mb4_general_ci")
cur.execute("SELECT @@character_set_connection, @@collation_connection")
self.assertEqual(cur.fetchone(), ("utf8mb4", "utf8mb4_general_ci"))
self.assertEqual(con.encoding, "utf8")

def test_largedata(self):
"""Large query and response (>=16MB)"""
cur = self.connect().cursor()
Expand Down