Thanks to visit codestin.com
Credit goes to github.com

Skip to content

[enhancement] move hardcoded encodings to contants #4378

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ The following wonderful people contributed directly or indirectly to this projec
- `Oleg Sushchenko <https://github.com/feuillemorte>`_
- `Or Bin <https://github.com/OrBin>`_
- `overquota <https://github.com/overquota>`_
- `Pablo Martinez <https://github.com/elpekenin>`_
- `Paradox <https://github.com/paradox70>`_
- `Patrick Hofmann <https://github.com/PH89>`_
- `Paul Larsen <https://github.com/PaulSonOfLars>`_
Expand Down
3 changes: 2 additions & 1 deletion telegram/_files/inputfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from uuid import uuid4

from telegram._utils.files import load_file
from telegram._utils.strings import TextEncoding
from telegram._utils.types import FieldTuple

_DEFAULT_MIME_TYPE = "application/octet-stream"
Expand Down Expand Up @@ -74,7 +75,7 @@ def __init__(
if isinstance(obj, bytes):
self.input_file_content: bytes = obj
elif isinstance(obj, str):
self.input_file_content = obj.encode("utf-8")
self.input_file_content = obj.encode(TextEncoding.UTF_8)
else:
reported_filename, self.input_file_content = load_file(obj)
filename = filename or reported_filename
Expand Down
5 changes: 3 additions & 2 deletions telegram/_games/game.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from telegram._messageentity import MessageEntity
from telegram._telegramobject import TelegramObject
from telegram._utils.argumentparsing import parse_sequence_arg
from telegram._utils.strings import TextEncoding
from telegram._utils.types import JSONDict

if TYPE_CHECKING:
Expand Down Expand Up @@ -157,10 +158,10 @@ def parse_text_entity(self, entity: MessageEntity) -> str:
if not self.text:
raise RuntimeError("This Game has no 'text'.")

entity_text = self.text.encode("utf-16-le")
entity_text = self.text.encode(TextEncoding.UTF_16_LE)
entity_text = entity_text[entity.offset * 2 : (entity.offset + entity.length) * 2]

return entity_text.decode("utf-16-le")
return entity_text.decode(TextEncoding.UTF_16_LE)

def parse_text_entities(self, types: Optional[List[str]] = None) -> Dict[MessageEntity, str]:
"""
Expand Down
19 changes: 11 additions & 8 deletions telegram/_message.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
from telegram._utils.datetime import extract_tzinfo_from_defaults, from_timestamp
from telegram._utils.defaultvalue import DEFAULT_NONE, DefaultValue
from telegram._utils.entities import parse_message_entities, parse_message_entity
from telegram._utils.strings import TextEncoding
from telegram._utils.types import (
CorrectOptionID,
FileInput,
Expand Down Expand Up @@ -1516,8 +1517,8 @@ def compute_quote_position_and_entities(
raise RuntimeError("This message has neither text nor caption.")

# Telegram wants the position in UTF-16 code units, so we have to calculate in that space
utf16_text = text.encode("utf-16-le")
utf16_quote = quote.encode("utf-16-le")
utf16_text = text.encode(TextEncoding.UTF_16_LE)
utf16_quote = quote.encode(TextEncoding.UTF_16_LE)
effective_index = index or 0

matches = list(re.finditer(re.escape(utf16_quote), utf16_text))
Expand Down Expand Up @@ -4479,7 +4480,7 @@ def _parse_html(
if message_text is None:
return None

utf_16_text = message_text.encode("utf-16-le")
utf_16_text = message_text.encode(TextEncoding.UTF_16_LE)
html_text = ""
last_offset = 0

Expand Down Expand Up @@ -4543,15 +4544,17 @@ def _parse_html(
# text is part of the parent entity
html_text += (
escape(
utf_16_text[last_offset * 2 : (entity.offset - offset) * 2].decode("utf-16-le")
utf_16_text[last_offset * 2 : (entity.offset - offset) * 2].decode(
TextEncoding.UTF_16_LE
)
)
+ insert
)

last_offset = entity.offset - offset + entity.length

# see comment above
html_text += escape(utf_16_text[last_offset * 2 :].decode("utf-16-le"))
html_text += escape(utf_16_text[last_offset * 2 :].decode(TextEncoding.UTF_16_LE))

return html_text

Expand Down Expand Up @@ -4680,7 +4683,7 @@ def _parse_markdown(
if message_text is None:
return None

utf_16_text = message_text.encode("utf-16-le")
utf_16_text = message_text.encode(TextEncoding.UTF_16_LE)
markdown_text = ""
last_offset = 0

Expand Down Expand Up @@ -4773,7 +4776,7 @@ def _parse_markdown(
markdown_text += (
escape_markdown(
utf_16_text[last_offset * 2 : (entity.offset - offset) * 2].decode(
"utf-16-le"
TextEncoding.UTF_16_LE
),
version=version,
)
Expand All @@ -4784,7 +4787,7 @@ def _parse_markdown(

# see comment above
markdown_text += escape_markdown(
utf_16_text[last_offset * 2 :].decode("utf-16-le"),
utf_16_text[last_offset * 2 :].decode(TextEncoding.UTF_16_LE),
version=version,
)

Expand Down
3 changes: 2 additions & 1 deletion telegram/_messageentity.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from telegram._telegramobject import TelegramObject
from telegram._user import User
from telegram._utils import enum
from telegram._utils.strings import TextEncoding
from telegram._utils.types import JSONDict

if TYPE_CHECKING:
Expand Down Expand Up @@ -203,7 +204,7 @@ def adjust_message_entities_to_utf_16(
for i, position in enumerate(positions):
last_position = positions[i - 1] if i > 0 else 0
text_slice = text[last_position:position]
accumulated_length += len(text_slice.encode("utf-16-le")) // 2
accumulated_length += len(text_slice.encode(TextEncoding.UTF_16_LE)) // 2
position_translation[position] = accumulated_length
# get the final output entites
out = []
Expand Down
3 changes: 2 additions & 1 deletion telegram/_passport/credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@

from telegram._telegramobject import TelegramObject
from telegram._utils.argumentparsing import parse_sequence_arg
from telegram._utils.strings import TextEncoding
from telegram._utils.types import JSONDict
from telegram.error import PassportDecryptionError

Expand Down Expand Up @@ -98,7 +99,7 @@ def decrypt(secret, hash, data):
@no_type_check
def decrypt_json(secret, hash, data):
"""Decrypts data using secret and hash and then decodes utf-8 string and loads json"""
return json.loads(decrypt(secret, hash, data).decode("utf-8"))
return json.loads(decrypt(secret, hash, data).decode(TextEncoding.UTF_8))


class EncryptedCredentials(TelegramObject):
Expand Down
5 changes: 3 additions & 2 deletions telegram/_utils/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from typing import Dict, Optional, Sequence

from telegram._messageentity import MessageEntity
from telegram._utils.strings import TextEncoding


def parse_message_entity(text: str, entity: MessageEntity) -> str:
Expand All @@ -38,10 +39,10 @@ def parse_message_entity(text: str, entity: MessageEntity) -> str:
Returns:
:obj:`str`: The text of the given entity.
"""
entity_text = text.encode("utf-16-le")
entity_text = text.encode(TextEncoding.UTF_16_LE)
entity_text = entity_text[entity.offset * 2 : (entity.offset + entity.length) * 2]

return entity_text.decode("utf-16-le")
return entity_text.decode(TextEncoding.UTF_16_LE)


def parse_message_entities(
Expand Down
17 changes: 17 additions & 0 deletions telegram/_utils/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,23 @@
the changelog.
"""

from telegram._utils.enum import StringEnum

# TODO: Remove this when https://github.com/PyCQA/pylint/issues/6887 is resolved.
# pylint: disable=invalid-enum-extension,invalid-slots


class TextEncoding(StringEnum):
"""This enum contains encoding schemes for text.

.. versionadded:: NEXT.VERSION
"""

__slots__ = ()

UTF_8 = "utf-8"
UTF_16_LE = "utf-16-le"


def to_camel_case(snake_str: str) -> str:
"""Converts a snake_case string to camelCase.
Expand Down
3 changes: 2 additions & 1 deletion telegram/request/_baserequest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from telegram._utils.defaultvalue import DEFAULT_NONE as _DEFAULT_NONE
from telegram._utils.defaultvalue import DefaultValue
from telegram._utils.logging import get_logger
from telegram._utils.strings import TextEncoding
from telegram._utils.types import JSONDict, ODVInput
from telegram._utils.warnings import warn
from telegram._version import __version__ as ptb_ver
Expand Down Expand Up @@ -403,7 +404,7 @@ def parse_json_payload(payload: bytes) -> JSONDict:
Raises:
TelegramError: If loading the JSON data failed
"""
decoded_s = payload.decode("utf-8", "replace")
decoded_s = payload.decode(TextEncoding.UTF_8, "replace")
try:
return json.loads(decoded_s)
except ValueError as exc:
Expand Down
3 changes: 2 additions & 1 deletion telegram/request/_requestdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from typing import Any, Dict, List, Optional, Union, final
from urllib.parse import urlencode

from telegram._utils.strings import TextEncoding
from telegram._utils.types import UploadFileDict
from telegram.request._requestparameter import RequestParameter

Expand Down Expand Up @@ -109,7 +110,7 @@ def json_payload(self) -> bytes:
To use a custom library for JSON encoding, you can directly encode the keys of
:attr:`parameters` - note that string valued keys should not be JSON encoded.
"""
return json.dumps(self.json_parameters).encode("utf-8")
return json.dumps(self.json_parameters).encode(TextEncoding.UTF_8)

@property
def multipart_data(self) -> UploadFileDict:
Expand Down
7 changes: 4 additions & 3 deletions tests/_files/test_inputfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import pytest

from telegram import InputFile
from telegram._utils.strings import TextEncoding
from tests.auxil.files import data_file
from tests.auxil.slots import mro_slots

Expand Down Expand Up @@ -150,17 +151,17 @@ async def test_send_bytes(self, bot, chat_id):
await (await message.document.get_file()).download_to_memory(out=out)
out.seek(0)

assert out.read().decode("utf-8") == "PTB Rocks! ⅞"
assert out.read().decode(TextEncoding.UTF_8) == "PTB Rocks! ⅞"

async def test_send_string(self, bot, chat_id):
# We test this here and not at the respective test modules because it's not worth
# duplicating the test for the different methods
message = await bot.send_document(
chat_id, InputFile(data_file("text_file.txt").read_text(encoding="utf-8"))
chat_id, InputFile(data_file("text_file.txt").read_text(encoding=TextEncoding.UTF_8))
)
out = BytesIO()

await (await message.document.get_file()).download_to_memory(out=out)
out.seek(0)

assert out.read().decode("utf-8") == "PTB Rocks! ⅞"
assert out.read().decode(TextEncoding.UTF_8) == "PTB Rocks! ⅞"
8 changes: 6 additions & 2 deletions tests/auxil/ci_bots.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import os
import random

from telegram._utils.strings import TextEncoding

# Provide some public fallbacks so it's easy for contributors to run tests on their local machine
# These bots are only able to talk in our test chats, so they are quite useless for other
# purposes than testing.
Expand All @@ -42,10 +44,12 @@
BOTS = os.getenv("BOTS", None)
JOB_INDEX = os.getenv("JOB_INDEX", None)
if GITHUB_ACTION is not None and BOTS is not None and JOB_INDEX is not None:
BOTS = json.loads(base64.b64decode(BOTS).decode("utf-8"))
BOTS = json.loads(base64.b64decode(BOTS).decode(TextEncoding.UTF_8))
JOB_INDEX = int(JOB_INDEX)

FALLBACKS = json.loads(base64.b64decode(FALLBACKS).decode("utf-8")) # type: list[dict[str, str]]
FALLBACKS = json.loads(
base64.b64decode(FALLBACKS).decode(TextEncoding.UTF_8)
) # type: list[dict[str, str]]


class BotInfoProvider:
Expand Down
3 changes: 2 additions & 1 deletion tests/auxil/networking.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from httpx import AsyncClient, AsyncHTTPTransport, Response

from telegram._utils.defaultvalue import DEFAULT_NONE
from telegram._utils.strings import TextEncoding
from telegram._utils.types import ODVInput
from telegram.error import BadRequest, RetryAfter, TimedOut
from telegram.request import HTTPXRequest, RequestData
Expand Down Expand Up @@ -103,7 +104,7 @@ async def send_webhook_message(
content_len = None
payload = None
else:
payload = bytes(payload_str, encoding="utf-8")
payload = bytes(payload_str, encoding=TextEncoding.UTF_8)

if content_len == -1:
content_len = len(payload)
Expand Down
3 changes: 2 additions & 1 deletion tests/request/test_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from httpx import AsyncHTTPTransport

from telegram._utils.defaultvalue import DEFAULT_NONE
from telegram._utils.strings import TextEncoding
from telegram.error import (
BadRequest,
ChatMigrated,
Expand Down Expand Up @@ -247,7 +248,7 @@ async def test_error_description(self, monkeypatch, httpx_request: HTTPXRequest,
else:
match = "Unknown HTTPError"

server_response = json.dumps(response_data).encode("utf-8")
server_response = json.dumps(response_data).encode(TextEncoding.UTF_8)

monkeypatch.setattr(
httpx_request,
Expand Down
4 changes: 3 additions & 1 deletion tests/test_enum_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import re
from pathlib import Path

from telegram._utils.strings import TextEncoding

telegram_root = Path(__file__).parent.parent / "telegram"
telegram_ext_root = telegram_root / "ext"
exclude_dirs = {
Expand Down Expand Up @@ -46,7 +48,7 @@ def test_types_are_converted_to_enum():
# We don't check tg.ext.
continue

text = path.read_text(encoding="utf-8")
text = path.read_text(encoding=TextEncoding.UTF_8)
for match in re.finditer(pattern, text):
if any(exclude_pattern.match(match.group(0)) for exclude_pattern in exclude_patterns):
continue
Expand Down
Loading