python-telegram-bot · Bibo-Joshi · Jul 21, 2024 · Jul 13, 2024 · Jul 14, 2024
diff --git a/AUTHORS.rst b/AUTHORS.rst
@@ -96,6 +96,7 @@ The following wonderful people contributed directly or indirectly to this projec
 - `Oleg Sushchenko <https://github.com/feuillemorte>`_
 - `Or Bin <https://github.com/OrBin>`_
 - `overquota <https://github.com/overquota>`_
+- `Pablo Martinez <https://github.com/elpekenin>`_
 - `Paradox <https://github.com/paradox70>`_
 - `Patrick Hofmann <https://github.com/PH89>`_
 - `Paul Larsen <https://github.com/PaulSonOfLars>`_

diff --git a/telegram/_files/inputfile.py b/telegram/_files/inputfile.py
@@ -23,6 +23,7 @@
 from uuid import uuid4
 
 from telegram._utils.files import load_file
+from telegram._utils.strings import TextEncoding
 from telegram._utils.types import FieldTuple
 
 _DEFAULT_MIME_TYPE = "application/octet-stream"
@@ -74,7 +75,7 @@ def __init__(
         if isinstance(obj, bytes):
             self.input_file_content: bytes = obj
         elif isinstance(obj, str):
-            self.input_file_content = obj.encode("utf-8")
+            self.input_file_content = obj.encode(TextEncoding.UTF_8)
         else:
             reported_filename, self.input_file_content = load_file(obj)
             filename = filename or reported_filename

diff --git a/telegram/_games/game.py b/telegram/_games/game.py
@@ -24,6 +24,7 @@
 from telegram._messageentity import MessageEntity
 from telegram._telegramobject import TelegramObject
 from telegram._utils.argumentparsing import parse_sequence_arg
+from telegram._utils.strings import TextEncoding
 from telegram._utils.types import JSONDict
 
 if TYPE_CHECKING:
@@ -157,10 +158,10 @@ def parse_text_entity(self, entity: MessageEntity) -> str:
         if not self.text:
             raise RuntimeError("This Game has no 'text'.")
 
-        entity_text = self.text.encode("utf-16-le")
+        entity_text = self.text.encode(TextEncoding.UTF_16_LE)
         entity_text = entity_text[entity.offset * 2 : (entity.offset + entity.length) * 2]
 
-        return entity_text.decode("utf-16-le")
+        return entity_text.decode(TextEncoding.UTF_16_LE)
 
     def parse_text_entities(self, types: Optional[List[str]] = None) -> Dict[MessageEntity, str]:
         """

diff --git a/telegram/_message.py b/telegram/_message.py
@@ -68,6 +68,7 @@
 from telegram._utils.datetime import extract_tzinfo_from_defaults, from_timestamp
 from telegram._utils.defaultvalue import DEFAULT_NONE, DefaultValue
 from telegram._utils.entities import parse_message_entities, parse_message_entity
+from telegram._utils.strings import TextEncoding
 from telegram._utils.types import (
     CorrectOptionID,
     FileInput,
@@ -1516,8 +1517,8 @@ def compute_quote_position_and_entities(
             raise RuntimeError("This message has neither text nor caption.")
 
         # Telegram wants the position in UTF-16 code units, so we have to calculate in that space
-        utf16_text = text.encode("utf-16-le")
-        utf16_quote = quote.encode("utf-16-le")
+        utf16_text = text.encode(TextEncoding.UTF_16_LE)
+        utf16_quote = quote.encode(TextEncoding.UTF_16_LE)
         effective_index = index or 0
 
         matches = list(re.finditer(re.escape(utf16_quote), utf16_text))
@@ -4479,7 +4480,7 @@ def _parse_html(
         if message_text is None:
             return None
 
-        utf_16_text = message_text.encode("utf-16-le")
+        utf_16_text = message_text.encode(TextEncoding.UTF_16_LE)
         html_text = ""
         last_offset = 0
 
@@ -4543,15 +4544,17 @@ def _parse_html(
             # text is part of the parent entity
             html_text += (
                 escape(
-                    utf_16_text[last_offset * 2 : (entity.offset - offset) * 2].decode("utf-16-le")
+                    utf_16_text[last_offset * 2 : (entity.offset - offset) * 2].decode(
+                        TextEncoding.UTF_16_LE
+                    )
                 )
                 + insert
             )
 
             last_offset = entity.offset - offset + entity.length
 
         # see comment above
-        html_text += escape(utf_16_text[last_offset * 2 :].decode("utf-16-le"))
+        html_text += escape(utf_16_text[last_offset * 2 :].decode(TextEncoding.UTF_16_LE))
 
         return html_text
 
@@ -4680,7 +4683,7 @@ def _parse_markdown(
         if message_text is None:
             return None
 
-        utf_16_text = message_text.encode("utf-16-le")
+        utf_16_text = message_text.encode(TextEncoding.UTF_16_LE)
         markdown_text = ""
         last_offset = 0
 
@@ -4773,7 +4776,7 @@ def _parse_markdown(
             markdown_text += (
                 escape_markdown(
                     utf_16_text[last_offset * 2 : (entity.offset - offset) * 2].decode(
-                        "utf-16-le"
+                        TextEncoding.UTF_16_LE
                     ),
                     version=version,
                 )
@@ -4784,7 +4787,7 @@ def _parse_markdown(
 
         # see comment above
         markdown_text += escape_markdown(
-            utf_16_text[last_offset * 2 :].decode("utf-16-le"),
+            utf_16_text[last_offset * 2 :].decode(TextEncoding.UTF_16_LE),
             version=version,
         )
 

diff --git a/telegram/_messageentity.py b/telegram/_messageentity.py
@@ -26,6 +26,7 @@
 from telegram._telegramobject import TelegramObject
 from telegram._user import User
 from telegram._utils import enum
+from telegram._utils.strings import TextEncoding
 from telegram._utils.types import JSONDict
 
 if TYPE_CHECKING:
@@ -203,7 +204,7 @@ def adjust_message_entities_to_utf_16(
         for i, position in enumerate(positions):
             last_position = positions[i - 1] if i > 0 else 0
             text_slice = text[last_position:position]
-            accumulated_length += len(text_slice.encode("utf-16-le")) // 2
+            accumulated_length += len(text_slice.encode(TextEncoding.UTF_16_LE)) // 2
             position_translation[position] = accumulated_length
         # get the final output entites
         out = []

diff --git a/telegram/_passport/credentials.py b/telegram/_passport/credentials.py
@@ -39,6 +39,7 @@
 
 from telegram._telegramobject import TelegramObject
 from telegram._utils.argumentparsing import parse_sequence_arg
+from telegram._utils.strings import TextEncoding
 from telegram._utils.types import JSONDict
 from telegram.error import PassportDecryptionError
 
@@ -98,7 +99,7 @@ def decrypt(secret, hash, data):
 @no_type_check
 def decrypt_json(secret, hash, data):
     """Decrypts data using secret and hash and then decodes utf-8 string and loads json"""
-    return json.loads(decrypt(secret, hash, data).decode("utf-8"))
+    return json.loads(decrypt(secret, hash, data).decode(TextEncoding.UTF_8))
 
 
 class EncryptedCredentials(TelegramObject):

diff --git a/telegram/_utils/entities.py b/telegram/_utils/entities.py
@@ -26,6 +26,7 @@
 from typing import Dict, Optional, Sequence
 
 from telegram._messageentity import MessageEntity
+from telegram._utils.strings import TextEncoding
 
 
 def parse_message_entity(text: str, entity: MessageEntity) -> str:
@@ -38,10 +39,10 @@ def parse_message_entity(text: str, entity: MessageEntity) -> str:
     Returns:
         :obj:`str`: The text of the given entity.
     """
-    entity_text = text.encode("utf-16-le")
+    entity_text = text.encode(TextEncoding.UTF_16_LE)
     entity_text = entity_text[entity.offset * 2 : (entity.offset + entity.length) * 2]
 
-    return entity_text.decode("utf-16-le")
+    return entity_text.decode(TextEncoding.UTF_16_LE)
 
 
 def parse_message_entities(

diff --git a/telegram/_utils/strings.py b/telegram/_utils/strings.py
@@ -24,6 +24,23 @@
     the changelog.
 """
 
+from telegram._utils.enum import StringEnum
+
+# TODO: Remove this when https://github.com/PyCQA/pylint/issues/6887 is resolved.
+# pylint: disable=invalid-enum-extension,invalid-slots
+
+
+class TextEncoding(StringEnum):
+    """This enum contains encoding schemes for text.
+
+    .. versionadded:: NEXT.VERSION
+    """
+
+    __slots__ = ()
+
+    UTF_8 = "utf-8"
+    UTF_16_LE = "utf-16-le"
+
 
 def to_camel_case(snake_str: str) -> str:
     """Converts a snake_case string to camelCase.

diff --git a/telegram/request/_baserequest.py b/telegram/request/_baserequest.py
@@ -26,6 +26,7 @@
 from telegram._utils.defaultvalue import DEFAULT_NONE as _DEFAULT_NONE
 from telegram._utils.defaultvalue import DefaultValue
 from telegram._utils.logging import get_logger
+from telegram._utils.strings import TextEncoding
 from telegram._utils.types import JSONDict, ODVInput
 from telegram._utils.warnings import warn
 from telegram._version import __version__ as ptb_ver
@@ -403,7 +404,7 @@ def parse_json_payload(payload: bytes) -> JSONDict:
         Raises:
             TelegramError: If loading the JSON data failed
         """
-        decoded_s = payload.decode("utf-8", "replace")
+        decoded_s = payload.decode(TextEncoding.UTF_8, "replace")
         try:
             return json.loads(decoded_s)
         except ValueError as exc:

diff --git a/telegram/request/_requestdata.py b/telegram/request/_requestdata.py
@@ -21,6 +21,7 @@
 from typing import Any, Dict, List, Optional, Union, final
 from urllib.parse import urlencode
 
+from telegram._utils.strings import TextEncoding
 from telegram._utils.types import UploadFileDict
 from telegram.request._requestparameter import RequestParameter
 
@@ -109,7 +110,7 @@ def json_payload(self) -> bytes:
             To use a custom library for JSON encoding, you can directly encode the keys of
             :attr:`parameters` - note that string valued keys should not be JSON encoded.
         """
-        return json.dumps(self.json_parameters).encode("utf-8")
+        return json.dumps(self.json_parameters).encode(TextEncoding.UTF_8)
 
     @property
     def multipart_data(self) -> UploadFileDict:

diff --git a/tests/_files/test_inputfile.py b/tests/_files/test_inputfile.py
@@ -24,6 +24,7 @@
 import pytest
 
 from telegram import InputFile
+from telegram._utils.strings import TextEncoding
 from tests.auxil.files import data_file
 from tests.auxil.slots import mro_slots
 
@@ -150,17 +151,17 @@ async def test_send_bytes(self, bot, chat_id):
         await (await message.document.get_file()).download_to_memory(out=out)
         out.seek(0)
 
-        assert out.read().decode("utf-8") == "PTB Rocks! ⅞"
+        assert out.read().decode(TextEncoding.UTF_8) == "PTB Rocks! ⅞"
 
     async def test_send_string(self, bot, chat_id):
         # We test this here and not at the respective test modules because it's not worth
         # duplicating the test for the different methods
         message = await bot.send_document(
-            chat_id, InputFile(data_file("text_file.txt").read_text(encoding="utf-8"))
+            chat_id, InputFile(data_file("text_file.txt").read_text(encoding=TextEncoding.UTF_8))
         )
         out = BytesIO()
 
         await (await message.document.get_file()).download_to_memory(out=out)
         out.seek(0)
 
-        assert out.read().decode("utf-8") == "PTB Rocks! ⅞"
+        assert out.read().decode(TextEncoding.UTF_8) == "PTB Rocks! ⅞"
diff --git a/tests/auxil/ci_bots.py b/tests/auxil/ci_bots.py
@@ -22,6 +22,8 @@
 import os
 import random
 
+from telegram._utils.strings import TextEncoding
+
 # Provide some public fallbacks so it's easy for contributors to run tests on their local machine
 # These bots are only able to talk in our test chats, so they are quite useless for other
 # purposes than testing.
@@ -42,10 +44,12 @@
 BOTS = os.getenv("BOTS", None)
 JOB_INDEX = os.getenv("JOB_INDEX", None)
 if GITHUB_ACTION is not None and BOTS is not None and JOB_INDEX is not None:
-    BOTS = json.loads(base64.b64decode(BOTS).decode("utf-8"))
+    BOTS = json.loads(base64.b64decode(BOTS).decode(TextEncoding.UTF_8))
     JOB_INDEX = int(JOB_INDEX)
 
-FALLBACKS = json.loads(base64.b64decode(FALLBACKS).decode("utf-8"))  # type: list[dict[str, str]]
+FALLBACKS = json.loads(
+    base64.b64decode(FALLBACKS).decode(TextEncoding.UTF_8)
+)  # type: list[dict[str, str]]
 
 
 class BotInfoProvider:

diff --git a/tests/auxil/networking.py b/tests/auxil/networking.py
@@ -23,6 +23,7 @@
 from httpx import AsyncClient, AsyncHTTPTransport, Response
 
 from telegram._utils.defaultvalue import DEFAULT_NONE
+from telegram._utils.strings import TextEncoding
 from telegram._utils.types import ODVInput
 from telegram.error import BadRequest, RetryAfter, TimedOut
 from telegram.request import HTTPXRequest, RequestData
@@ -103,7 +104,7 @@ async def send_webhook_message(
         content_len = None
         payload = None
     else:
-        payload = bytes(payload_str, encoding="utf-8")
+        payload = bytes(payload_str, encoding=TextEncoding.UTF_8)
 
     if content_len == -1:
         content_len = len(payload)

diff --git a/tests/request/test_request.py b/tests/request/test_request.py
@@ -31,6 +31,7 @@
 from httpx import AsyncHTTPTransport
 
 from telegram._utils.defaultvalue import DEFAULT_NONE
+from telegram._utils.strings import TextEncoding
 from telegram.error import (
     BadRequest,
     ChatMigrated,
@@ -247,7 +248,7 @@ async def test_error_description(self, monkeypatch, httpx_request: HTTPXRequest,
         else:
             match = "Unknown HTTPError"
 
-        server_response = json.dumps(response_data).encode("utf-8")
+        server_response = json.dumps(response_data).encode(TextEncoding.UTF_8)
 
         monkeypatch.setattr(
             httpx_request,

diff --git a/tests/test_enum_types.py b/tests/test_enum_types.py
@@ -19,6 +19,8 @@
 import re
 from pathlib import Path
 
+from telegram._utils.strings import TextEncoding
+
 telegram_root = Path(__file__).parent.parent / "telegram"
 telegram_ext_root = telegram_root / "ext"
 exclude_dirs = {
@@ -46,7 +48,7 @@ def test_types_are_converted_to_enum():
             # We don't check tg.ext.
             continue
 
-        text = path.read_text(encoding="utf-8")
+        text = path.read_text(encoding=TextEncoding.UTF_8)
         for match in re.finditer(pattern, text):
             if any(exclude_pattern.match(match.group(0)) for exclude_pattern in exclude_patterns):
                 continue