diff --git a/AUTHORS.rst b/AUTHORS.rst index fe69dba625d..7477c888ab9 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -96,6 +96,7 @@ The following wonderful people contributed directly or indirectly to this projec - `Oleg Sushchenko `_ - `Or Bin `_ - `overquota `_ +- `Pablo Martinez `_ - `Paradox `_ - `Patrick Hofmann `_ - `Paul Larsen `_ diff --git a/telegram/_files/inputfile.py b/telegram/_files/inputfile.py index 994135bb5dd..9a07f6d65fa 100644 --- a/telegram/_files/inputfile.py +++ b/telegram/_files/inputfile.py @@ -23,6 +23,7 @@ from uuid import uuid4 from telegram._utils.files import load_file +from telegram._utils.strings import TextEncoding from telegram._utils.types import FieldTuple _DEFAULT_MIME_TYPE = "application/octet-stream" @@ -74,7 +75,7 @@ def __init__( if isinstance(obj, bytes): self.input_file_content: bytes = obj elif isinstance(obj, str): - self.input_file_content = obj.encode("utf-8") + self.input_file_content = obj.encode(TextEncoding.UTF_8) else: reported_filename, self.input_file_content = load_file(obj) filename = filename or reported_filename diff --git a/telegram/_games/game.py b/telegram/_games/game.py index 93b3f0161cc..1a25d1ad538 100644 --- a/telegram/_games/game.py +++ b/telegram/_games/game.py @@ -24,6 +24,7 @@ from telegram._messageentity import MessageEntity from telegram._telegramobject import TelegramObject from telegram._utils.argumentparsing import parse_sequence_arg +from telegram._utils.strings import TextEncoding from telegram._utils.types import JSONDict if TYPE_CHECKING: @@ -157,10 +158,10 @@ def parse_text_entity(self, entity: MessageEntity) -> str: if not self.text: raise RuntimeError("This Game has no 'text'.") - entity_text = self.text.encode("utf-16-le") + entity_text = self.text.encode(TextEncoding.UTF_16_LE) entity_text = entity_text[entity.offset * 2 : (entity.offset + entity.length) * 2] - return entity_text.decode("utf-16-le") + return entity_text.decode(TextEncoding.UTF_16_LE) def parse_text_entities(self, types: Optional[List[str]] = None) -> Dict[MessageEntity, str]: """ diff --git a/telegram/_message.py b/telegram/_message.py index fceb8cb8768..a705dc65934 100644 --- a/telegram/_message.py +++ b/telegram/_message.py @@ -68,6 +68,7 @@ from telegram._utils.datetime import extract_tzinfo_from_defaults, from_timestamp from telegram._utils.defaultvalue import DEFAULT_NONE, DefaultValue from telegram._utils.entities import parse_message_entities, parse_message_entity +from telegram._utils.strings import TextEncoding from telegram._utils.types import ( CorrectOptionID, FileInput, @@ -1516,8 +1517,8 @@ def compute_quote_position_and_entities( raise RuntimeError("This message has neither text nor caption.") # Telegram wants the position in UTF-16 code units, so we have to calculate in that space - utf16_text = text.encode("utf-16-le") - utf16_quote = quote.encode("utf-16-le") + utf16_text = text.encode(TextEncoding.UTF_16_LE) + utf16_quote = quote.encode(TextEncoding.UTF_16_LE) effective_index = index or 0 matches = list(re.finditer(re.escape(utf16_quote), utf16_text)) @@ -4479,7 +4480,7 @@ def _parse_html( if message_text is None: return None - utf_16_text = message_text.encode("utf-16-le") + utf_16_text = message_text.encode(TextEncoding.UTF_16_LE) html_text = "" last_offset = 0 @@ -4543,7 +4544,9 @@ def _parse_html( # text is part of the parent entity html_text += ( escape( - utf_16_text[last_offset * 2 : (entity.offset - offset) * 2].decode("utf-16-le") + utf_16_text[last_offset * 2 : (entity.offset - offset) * 2].decode( + TextEncoding.UTF_16_LE + ) ) + insert ) @@ -4551,7 +4554,7 @@ def _parse_html( last_offset = entity.offset - offset + entity.length # see comment above - html_text += escape(utf_16_text[last_offset * 2 :].decode("utf-16-le")) + html_text += escape(utf_16_text[last_offset * 2 :].decode(TextEncoding.UTF_16_LE)) return html_text @@ -4680,7 +4683,7 @@ def _parse_markdown( if message_text is None: return None - utf_16_text = message_text.encode("utf-16-le") + utf_16_text = message_text.encode(TextEncoding.UTF_16_LE) markdown_text = "" last_offset = 0 @@ -4773,7 +4776,7 @@ def _parse_markdown( markdown_text += ( escape_markdown( utf_16_text[last_offset * 2 : (entity.offset - offset) * 2].decode( - "utf-16-le" + TextEncoding.UTF_16_LE ), version=version, ) @@ -4784,7 +4787,7 @@ def _parse_markdown( # see comment above markdown_text += escape_markdown( - utf_16_text[last_offset * 2 :].decode("utf-16-le"), + utf_16_text[last_offset * 2 :].decode(TextEncoding.UTF_16_LE), version=version, ) diff --git a/telegram/_messageentity.py b/telegram/_messageentity.py index 302f3a1c080..6e219537fae 100644 --- a/telegram/_messageentity.py +++ b/telegram/_messageentity.py @@ -26,6 +26,7 @@ from telegram._telegramobject import TelegramObject from telegram._user import User from telegram._utils import enum +from telegram._utils.strings import TextEncoding from telegram._utils.types import JSONDict if TYPE_CHECKING: @@ -203,7 +204,7 @@ def adjust_message_entities_to_utf_16( for i, position in enumerate(positions): last_position = positions[i - 1] if i > 0 else 0 text_slice = text[last_position:position] - accumulated_length += len(text_slice.encode("utf-16-le")) // 2 + accumulated_length += len(text_slice.encode(TextEncoding.UTF_16_LE)) // 2 position_translation[position] = accumulated_length # get the final output entites out = [] diff --git a/telegram/_passport/credentials.py b/telegram/_passport/credentials.py index 514f7fffb6c..fab0b6eb2c8 100644 --- a/telegram/_passport/credentials.py +++ b/telegram/_passport/credentials.py @@ -39,6 +39,7 @@ from telegram._telegramobject import TelegramObject from telegram._utils.argumentparsing import parse_sequence_arg +from telegram._utils.strings import TextEncoding from telegram._utils.types import JSONDict from telegram.error import PassportDecryptionError @@ -98,7 +99,7 @@ def decrypt(secret, hash, data): @no_type_check def decrypt_json(secret, hash, data): """Decrypts data using secret and hash and then decodes utf-8 string and loads json""" - return json.loads(decrypt(secret, hash, data).decode("utf-8")) + return json.loads(decrypt(secret, hash, data).decode(TextEncoding.UTF_8)) class EncryptedCredentials(TelegramObject): diff --git a/telegram/_utils/entities.py b/telegram/_utils/entities.py index a3994cd0426..34901c3d6f7 100644 --- a/telegram/_utils/entities.py +++ b/telegram/_utils/entities.py @@ -26,6 +26,7 @@ from typing import Dict, Optional, Sequence from telegram._messageentity import MessageEntity +from telegram._utils.strings import TextEncoding def parse_message_entity(text: str, entity: MessageEntity) -> str: @@ -38,10 +39,10 @@ def parse_message_entity(text: str, entity: MessageEntity) -> str: Returns: :obj:`str`: The text of the given entity. """ - entity_text = text.encode("utf-16-le") + entity_text = text.encode(TextEncoding.UTF_16_LE) entity_text = entity_text[entity.offset * 2 : (entity.offset + entity.length) * 2] - return entity_text.decode("utf-16-le") + return entity_text.decode(TextEncoding.UTF_16_LE) def parse_message_entities( diff --git a/telegram/_utils/strings.py b/telegram/_utils/strings.py index dc044e86420..c57e4e48b32 100644 --- a/telegram/_utils/strings.py +++ b/telegram/_utils/strings.py @@ -24,6 +24,23 @@ the changelog. """ +from telegram._utils.enum import StringEnum + +# TODO: Remove this when https://github.com/PyCQA/pylint/issues/6887 is resolved. +# pylint: disable=invalid-enum-extension,invalid-slots + + +class TextEncoding(StringEnum): + """This enum contains encoding schemes for text. + + .. versionadded:: NEXT.VERSION + """ + + __slots__ = () + + UTF_8 = "utf-8" + UTF_16_LE = "utf-16-le" + def to_camel_case(snake_str: str) -> str: """Converts a snake_case string to camelCase. diff --git a/telegram/request/_baserequest.py b/telegram/request/_baserequest.py index 93024d6c4d0..d323dfb0620 100644 --- a/telegram/request/_baserequest.py +++ b/telegram/request/_baserequest.py @@ -26,6 +26,7 @@ from telegram._utils.defaultvalue import DEFAULT_NONE as _DEFAULT_NONE from telegram._utils.defaultvalue import DefaultValue from telegram._utils.logging import get_logger +from telegram._utils.strings import TextEncoding from telegram._utils.types import JSONDict, ODVInput from telegram._utils.warnings import warn from telegram._version import __version__ as ptb_ver @@ -403,7 +404,7 @@ def parse_json_payload(payload: bytes) -> JSONDict: Raises: TelegramError: If loading the JSON data failed """ - decoded_s = payload.decode("utf-8", "replace") + decoded_s = payload.decode(TextEncoding.UTF_8, "replace") try: return json.loads(decoded_s) except ValueError as exc: diff --git a/telegram/request/_requestdata.py b/telegram/request/_requestdata.py index 658a445649d..1b5b5446d5e 100644 --- a/telegram/request/_requestdata.py +++ b/telegram/request/_requestdata.py @@ -21,6 +21,7 @@ from typing import Any, Dict, List, Optional, Union, final from urllib.parse import urlencode +from telegram._utils.strings import TextEncoding from telegram._utils.types import UploadFileDict from telegram.request._requestparameter import RequestParameter @@ -109,7 +110,7 @@ def json_payload(self) -> bytes: To use a custom library for JSON encoding, you can directly encode the keys of :attr:`parameters` - note that string valued keys should not be JSON encoded. """ - return json.dumps(self.json_parameters).encode("utf-8") + return json.dumps(self.json_parameters).encode(TextEncoding.UTF_8) @property def multipart_data(self) -> UploadFileDict: diff --git a/tests/_files/test_inputfile.py b/tests/_files/test_inputfile.py index 2a2a3b60734..1f70cb5ccda 100644 --- a/tests/_files/test_inputfile.py +++ b/tests/_files/test_inputfile.py @@ -24,6 +24,7 @@ import pytest from telegram import InputFile +from telegram._utils.strings import TextEncoding from tests.auxil.files import data_file from tests.auxil.slots import mro_slots @@ -150,17 +151,17 @@ async def test_send_bytes(self, bot, chat_id): await (await message.document.get_file()).download_to_memory(out=out) out.seek(0) - assert out.read().decode("utf-8") == "PTB Rocks! ⅞" + assert out.read().decode(TextEncoding.UTF_8) == "PTB Rocks! ⅞" async def test_send_string(self, bot, chat_id): # We test this here and not at the respective test modules because it's not worth # duplicating the test for the different methods message = await bot.send_document( - chat_id, InputFile(data_file("text_file.txt").read_text(encoding="utf-8")) + chat_id, InputFile(data_file("text_file.txt").read_text(encoding=TextEncoding.UTF_8)) ) out = BytesIO() await (await message.document.get_file()).download_to_memory(out=out) out.seek(0) - assert out.read().decode("utf-8") == "PTB Rocks! ⅞" + assert out.read().decode(TextEncoding.UTF_8) == "PTB Rocks! ⅞" diff --git a/tests/auxil/ci_bots.py b/tests/auxil/ci_bots.py index bfad962b811..bdb25a2f0ee 100644 --- a/tests/auxil/ci_bots.py +++ b/tests/auxil/ci_bots.py @@ -22,6 +22,8 @@ import os import random +from telegram._utils.strings import TextEncoding + # Provide some public fallbacks so it's easy for contributors to run tests on their local machine # These bots are only able to talk in our test chats, so they are quite useless for other # purposes than testing. @@ -42,10 +44,12 @@ BOTS = os.getenv("BOTS", None) JOB_INDEX = os.getenv("JOB_INDEX", None) if GITHUB_ACTION is not None and BOTS is not None and JOB_INDEX is not None: - BOTS = json.loads(base64.b64decode(BOTS).decode("utf-8")) + BOTS = json.loads(base64.b64decode(BOTS).decode(TextEncoding.UTF_8)) JOB_INDEX = int(JOB_INDEX) -FALLBACKS = json.loads(base64.b64decode(FALLBACKS).decode("utf-8")) # type: list[dict[str, str]] +FALLBACKS = json.loads( + base64.b64decode(FALLBACKS).decode(TextEncoding.UTF_8) +) # type: list[dict[str, str]] class BotInfoProvider: diff --git a/tests/auxil/networking.py b/tests/auxil/networking.py index 2284f31fc50..7c20da7ac94 100644 --- a/tests/auxil/networking.py +++ b/tests/auxil/networking.py @@ -23,6 +23,7 @@ from httpx import AsyncClient, AsyncHTTPTransport, Response from telegram._utils.defaultvalue import DEFAULT_NONE +from telegram._utils.strings import TextEncoding from telegram._utils.types import ODVInput from telegram.error import BadRequest, RetryAfter, TimedOut from telegram.request import HTTPXRequest, RequestData @@ -103,7 +104,7 @@ async def send_webhook_message( content_len = None payload = None else: - payload = bytes(payload_str, encoding="utf-8") + payload = bytes(payload_str, encoding=TextEncoding.UTF_8) if content_len == -1: content_len = len(payload) diff --git a/tests/request/test_request.py b/tests/request/test_request.py index 0f664cbdbcf..55100940b18 100644 --- a/tests/request/test_request.py +++ b/tests/request/test_request.py @@ -31,6 +31,7 @@ from httpx import AsyncHTTPTransport from telegram._utils.defaultvalue import DEFAULT_NONE +from telegram._utils.strings import TextEncoding from telegram.error import ( BadRequest, ChatMigrated, @@ -247,7 +248,7 @@ async def test_error_description(self, monkeypatch, httpx_request: HTTPXRequest, else: match = "Unknown HTTPError" - server_response = json.dumps(response_data).encode("utf-8") + server_response = json.dumps(response_data).encode(TextEncoding.UTF_8) monkeypatch.setattr( httpx_request, diff --git a/tests/test_enum_types.py b/tests/test_enum_types.py index b16002c6642..947d5fd0655 100644 --- a/tests/test_enum_types.py +++ b/tests/test_enum_types.py @@ -19,6 +19,8 @@ import re from pathlib import Path +from telegram._utils.strings import TextEncoding + telegram_root = Path(__file__).parent.parent / "telegram" telegram_ext_root = telegram_root / "ext" exclude_dirs = { @@ -46,7 +48,7 @@ def test_types_are_converted_to_enum(): # We don't check tg.ext. continue - text = path.read_text(encoding="utf-8") + text = path.read_text(encoding=TextEncoding.UTF_8) for match in re.finditer(pattern, text): if any(exclude_pattern.match(match.group(0)) for exclude_pattern in exclude_patterns): continue