Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Add MessageEntity.shift_entities and MessageEntity.concatenate #4376

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Sep 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 142 additions & 5 deletions telegram/_messageentity.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

import copy
import itertools
from typing import TYPE_CHECKING, Dict, Final, List, Optional, Sequence
from typing import TYPE_CHECKING, Dict, Final, List, Optional, Sequence, Tuple, Union

from telegram import constants
from telegram._telegramobject import TelegramObject
Expand All @@ -32,6 +32,8 @@
if TYPE_CHECKING:
from telegram import Bot

_SEM = Sequence["MessageEntity"]


class MessageEntity(TelegramObject):
"""
Expand Down Expand Up @@ -146,9 +148,7 @@ def de_json(
return super().de_json(data=data, bot=bot)

@staticmethod
def adjust_message_entities_to_utf_16(
text: str, entities: Sequence["MessageEntity"]
) -> Sequence["MessageEntity"]:
def adjust_message_entities_to_utf_16(text: str, entities: _SEM) -> _SEM:
"""Utility functionality for converting the offset and length of entities from
Unicode (:obj:`str`) to UTF-16 (``utf-16-le`` encoded :obj:`bytes`).

Expand Down Expand Up @@ -206,7 +206,7 @@ def adjust_message_entities_to_utf_16(
text_slice = text[last_position:position]
accumulated_length += len(text_slice.encode(TextEncoding.UTF_16_LE)) // 2
position_translation[position] = accumulated_length
# get the final output entites
# get the final output entities
out = []
for entity in entities:
translated_positions = position_translation[entity.offset]
Expand All @@ -220,6 +220,143 @@ def adjust_message_entities_to_utf_16(
out.append(new_entity)
return out

@staticmethod
def shift_entities(by: Union[str, int], entities: _SEM) -> _SEM:
"""Utility functionality for shifting the offset of entities by a given amount.

Examples:
Shifting by an integer amount:

.. code-block:: python

text = "Hello, world!"
entities = [
MessageEntity(offset=0, length=5, type=MessageEntity.BOLD),
MessageEntity(offset=7, length=5, type=MessageEntity.ITALIC),
]
shifted_entities = MessageEntity.shift_entities(1, entities)
await bot.send_message(
chat_id=123,
text="!" + text,
entities=shifted_entities,
)

Shifting using a string:

.. code-block:: python

text = "Hello, world!"
prefix = "𝄒"
entities = [
MessageEntity(offset=0, length=5, type=MessageEntity.BOLD),
MessageEntity(offset=7, length=5, type=MessageEntity.ITALIC),
]
shifted_entities = MessageEntity.shift_entities(prefix, entities)
await bot.send_message(
chat_id=123,
text=prefix + text,
entities=shifted_entities,
)

Tip:
The :paramref:`entities` are *not* modified in place. The function returns a sequence
of new objects.

.. versionadded:: NEXT.VERSION

Args:
by (:obj:`str` | :obj:`int`): Either the amount to shift the offset by or
a string whose length will be used as the amount to shift the offset by. In this
case, UTF-16 encoding will be used to calculate the length.
entities (Sequence[:class:`telegram.MessageEntity`]): Sequence of entities
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
entities (Sequence[:class:`telegram.MessageEntity`]): Sequence of entities
entities (Sequence[:class:`telegram.MessageEntity`]): Sequence of entities whose :paramref:`~telegram.MessageEntity.offset` will be shifted :param:`by`.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't that be clear by the description? : πŸ€” it would at least have to be "by parameter by" ...

Copy link
Member

@harshil21 harshil21 Aug 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well yeah, but the description of the parameter just looked a bit too short. I'm fine with keeping as is then


Returns:
Sequence[:class:`telegram.MessageEntity`]: Sequence of entities with the offset shifted
"""
effective_shift = by if isinstance(by, int) else len(by.encode("utf-16-le")) // 2

out = []
for entity in entities:
new_entity = copy.copy(entity)
with new_entity._unfrozen():
new_entity.offset += effective_shift
out.append(new_entity)
return out

@classmethod
def concatenate(
cls,
*args: Union[Tuple[str, _SEM], Tuple[str, _SEM, bool]],
) -> Tuple[str, _SEM]:
"""Utility functionality for concatenating two text along with their formatting entities.

Tip:
This function is useful for prefixing an already formatted text with a new text and its
formatting entities. In particular, it automatically correctly handles UTF-16 encoding.

Examples:
This example shows a callback function that can be used to add a prefix and suffix to
the message in a :class:`~telegram.ext.CallbackQueryHandler`:

.. code-block:: python

async def prefix_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
prefix = "π Œ• bold 𝄒 italic underlined: π›™πŒ’π‘ | "
prefix_entities = [
MessageEntity(offset=2, length=4, type=MessageEntity.BOLD),
MessageEntity(offset=9, length=6, type=MessageEntity.ITALIC),
MessageEntity(offset=28, length=3, type=MessageEntity.UNDERLINE),
]
suffix = " | π Œ• bold 𝄒 italic underlined: π›™πŒ’π‘"
suffix_entities = [
MessageEntity(offset=5, length=4, type=MessageEntity.BOLD),
MessageEntity(offset=12, length=6, type=MessageEntity.ITALIC),
MessageEntity(offset=31, length=3, type=MessageEntity.UNDERLINE),
]

message = update.effective_message
first = (prefix, prefix_entities, True)
second = (message.text, message.entities)
third = (suffix, suffix_entities, True)

new_text, new_entities = MessageEntity.concatenate(first, second, third)
await update.callback_query.edit_message_text(
text=new_text,
entities=new_entities,
)

Hint:
The entities are *not* modified in place. The function returns a
new sequence of objects.

.. versionadded:: NEXT.VERSION

Args:
*args (Tuple[:obj:`str`, Sequence[:class:`telegram.MessageEntity`]] | \
Tuple[:obj:`str`, Sequence[:class:`telegram.MessageEntity`], :obj:`bool`]):
Arbitrary number of tuples containing the text and its entities to concatenate.
If the last element of the tuple is a :obj:`bool`, it is used to determine whether
to adjust the entities to UTF-16 via
:meth:`adjust_message_entities_to_utf_16`. UTF-16 adjustment is disabled by
default.

Returns:
Tuple[:obj:`str`, Sequence[:class:`telegram.MessageEntity`]]: The concatenated text
and its entities
"""
output_text = ""
output_entities: List[MessageEntity] = []
for arg in args:
text, entities = arg[0], arg[1]

if len(arg) > 2 and arg[2] is True:
entities = cls.adjust_message_entities_to_utf_16(text, entities)

output_entities.extend(cls.shift_entities(output_text, entities))
output_text += text

return output_text, output_entities

ALL_TYPES: Final[List[str]] = list(constants.MessageEntityType)
"""List[:obj:`str`]: A list of all available message entity types."""
BLOCKQUOTE: Final[str] = constants.MessageEntityType.BLOCKQUOTE
Expand Down
48 changes: 48 additions & 0 deletions tests/test_messageentity.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,54 @@ def test_fix_utf16(self):
assert out_entity.offset == offset
assert out_entity.length == length

@pytest.mark.parametrize("by", [6, "prefix", "π›™πŒ’π‘"])
def test_shift_entities(self, by):
kwargs = {
"url": "url",
"user": 42,
"language": "python",
"custom_emoji_id": "custom_emoji_id",
}
entities = [
MessageEntity(MessageEntity.BOLD, 2, 3, **kwargs),
MessageEntity(MessageEntity.BOLD, 5, 6, **kwargs),
]
shifted = MessageEntity.shift_entities(by, entities)
assert shifted[0].offset == 8
assert shifted[1].offset == 11

assert shifted[0] is not entities[0]
assert shifted[1] is not entities[1]

for entity in shifted:
for key, value in kwargs.items():
assert getattr(entity, key) == value

def test_concatenate(self):
kwargs = {
"url": "url",
"user": 42,
"language": "python",
"custom_emoji_id": "custom_emoji_id",
}
first_entity = MessageEntity(MessageEntity.BOLD, 0, 6, **kwargs)
second_entity = MessageEntity(MessageEntity.ITALIC, 0, 4, **kwargs)
third_entity = MessageEntity(MessageEntity.UNDERLINE, 3, 6, **kwargs)

first = ("prefix π›™πŒ’π‘ | ", [first_entity], True)
second = ("text π›™πŒ’π‘", [second_entity], False)
third = (" | suffix π›™πŒ’π‘", [third_entity])

new_text, new_entities = MessageEntity.concatenate(first, second, third)

assert new_text == "prefix π›™πŒ’π‘ | text π›™πŒ’π‘ | suffix π›™πŒ’π‘"
assert [entity.offset for entity in new_entities] == [0, 16, 30]
for old, new in zip([first_entity, second_entity, third_entity], new_entities):
assert new is not old
assert new.type == old.type
for key, value in kwargs.items():
assert getattr(new, key) == value

def test_equality(self):
a = MessageEntity(MessageEntity.BOLD, 2, 3)
b = MessageEntity(MessageEntity.BOLD, 2, 3)
Expand Down
Loading