-
-
Save chuv1/9641abc8a5a1a9b3bb8c9177fb7ffa9e to your computer and use it in GitHub Desktop.
public function getHTML($without_cmd = false){ | |
if(empty($this->getEntities())){ | |
return $this->getText($without_cmd); | |
} | |
$text = $this->getText(); | |
$html = ''; | |
$entities_count = \count($this->getEntities())-1; | |
foreach($this->getEntities() as $k => $entity){ | |
if($k === 0){ | |
$html .= mb_substr($text, 0, $entity->getOffset()); | |
} | |
switch($entity->getType()){ | |
default: | |
case 'mention': | |
case 'hashtag': | |
case 'cashtag': | |
case 'bot_command': | |
case 'url': | |
case 'email': | |
case 'phone_number': | |
$html .= mb_substr($text, $entity->getOffset(), $entity->getLength()); | |
break; | |
case 'text_mention': | |
$html .= '<a href="tg://user?id='.$entity->getUser()->getId().'">'.mb_substr($text, $entity->getOffset(), $entity->getLength()).'</a>'; | |
break; | |
case 'text_link': | |
$html .= '<a href="'.$entity->getUrl().'">'.mb_substr($text, $entity->getOffset(), $entity->getLength()).'</a>'; | |
break; | |
case 'bold': | |
$html .= '<b>' . mb_substr($text, $entity->getOffset(), $entity->getLength()) . '</b>'; | |
break; | |
case 'italic': | |
$html .= '<i>' . mb_substr($text, $entity->getOffset(), $entity->getLength()) . '</i>'; | |
break; | |
case 'code': | |
$html .= '<code>' . mb_substr($text, $entity->getOffset(), $entity->getLength()) . '</code>'; | |
break; | |
case 'pre': | |
$html .= '<pre>' . mb_substr($text, $entity->getOffset(), $entity->getLength()) . '</pre>'; | |
break; | |
} | |
if($k === $entities_count){ | |
$html .= mb_substr($text, $entity->getOffset() + $entity->getLength()); | |
} | |
} | |
return $html; | |
} |
Also it won't work if message contains emojis, because offsets are specified in UTF-16 format. Here ruby implementation which should handle all corner cases:
def to_html(message)
start_tags = {}
end_tags = {}
if message.key?("entities")
start_tags = message["entities"].group_by {|e| e["offset"] * 2}
end_tags = message["entities"].group_by {|e| (e["offset"] + e["length"]) * 2}
end
html = []
text = String.new(message["text"]).encode("UTF-16").bytes[2..]
text.each_with_index do |c, i|
if start_tags.key?(i)
start_tags[i].each do |e|
html.push *start_tag_to_text(e, text).encode("UTF-16").bytes[2..]
end
end
html << c
if end_tags.key?(i + 1)
end_tags[i + 1].reverse.each do |e|
html.push *end_tag_to_text(e).encode("UTF-16").bytes[2..]
end
end
end
# UTF-16 BOM
html.insert(0, 254, 255)
puts("=== RESULT ===")
puts(html.pack("C*").force_encoding("UTF-16").encode("UTF-8"))
html
end
def start_tag_to_text(tag, text)
case tag["type"]
when "bold"
"<b>"
when "italic"
"<i>"
when "underline"
"<u>"
when "strikethrough"
"<s>"
when "code"
"<code>"
when "pre"
"<pre>"
when "text_link"
"<a href=\"#{tag["url"]}\">"
when "mention"
"<a href=\"https://t.me/#{text[tag["offset"] + 1..tag["offset"] + tag["length"]]}\">"
when "url"
"<a href=\"#{text[tag["offset"]..tag["offset"] + tag["length"] - 1]}\">"
else
""
end
end
def end_tag_to_text(tag)
tags = {
"bold" => "</b>",
"italic" => "</i>",
"underline" => "</u>",
"strikethrough" => "</s>",
"code" => "</code>",
"pre" => "</pre>",
"text_link" => "</a>",
"mention" => "</a>",
"url" => "</a>"
}
tags.fetch(tag["type"], "")
end
I was looking for a javascript implementation of this code. Thanks to your example I was able to develop a basic version. Thank you!
const parseTelegramMessage = function (telegramData) {
const text = telegramData.message.text || telegramData.message.caption
const entities = telegramData.message.entities || telegramData.message.caption_entities
if (!entities) {
return text
}
let html = ''
entities.forEach((entity, index) => {
// Characters before entity
if (index === 0) {
html += text.slice(0, entity.offset)
}
// Handle entity transformation
const entityText = text.slice(entity.offset, entity.offset + entity.length)
switch (entity.type) {
case 'bold':
html += `<strong>${entityText}</strong>`
break
case 'text_link':
html += `<a href="${entity.url}" target="_blank">${entityText}</a>`
break
case 'url':
html += `<a href="${entityText}" target="_blank">${entityText}</a>`
break
case 'italic':
html += `<em>${entityText}</em>`
break
case 'mention':
html += `<a href="https://t.me/${entityText.replace('@', '')}" target="_blank">${entityText}</a>`
break
case 'email':
html += `<a href="mailto:${entityText}">${entityText}</a>`
break
case 'phone_number':
html += `<a href="tel:${entityText}">${entityText}</a>`
break
default:
html += `${entityText}`
}
// Characters after entity but before next entity
if (entities.length > index + 1) {
html += text.slice(entity.offset + entity.length, entities[index + 1].offset)
}
// Last characters after last entity
if (entities.length === index + 1) {
html += text.slice(entity.offset + entity.length)
}
})
return html
}
Wow nice can someone give me an example of how to string messages to telegram entities? in javascript
This is awesome and helped me a lot. Unfortunately, it causes problems with nested entities.
Example: a URL that should be displayed bold. Currently, it's being parsed like this
<a href="...">URL</a><strong>URL</strong>
It should be this
<a href="..."><strong>URL</strong></a>
Any ideas how to improve this?
Rewrote the code, now you can use multiple styles and attach links
const parseTelegramMessage = (telegramData) => {
const text = telegramData.message.text || telegramData.message.caption
const entities = telegramData.message.entities || telegramData.message.caption_entities
if (!entities) {
return text
}
let tags = [];
entities.forEach((entity) => {
const startTag = getTag(entity, text);
let searchTag = tags.filter(tag=>tag.index===entity.offset)
if (searchTag.length>0)
searchTag[0].tag+=startTag;
else
tags.push({
index: entity.offset,
tag: startTag
});
const closeTag = startTag.indexOf("<a ")===0 ? "</a>" : "</"+startTag.slice(1);
searchTag = tags.filter(tag=>tag.index===entity.offset+entity.length)
if (searchTag.length>0)
searchTag[0].tag = closeTag+searchTag[0].tag;
else
tags.push({
index: entity.offset+entity.length,
tag: closeTag
})
})
let html = "";
for (let i = 0; i<text.length; i++){
const tag = tags.filter(tag=>tag.index===i);
tags = tags.filter(tag=>tag.index!==i);
if (tag.length>0)
html+=tag[0].tag;
html+=text[i];
}
if (tags.length>0)
html+=tags[0].tag
return html;
}
const getTag = (entity, text) => {
const entityText = text.slice(entity.offset, entity.offset + entity.length)
switch (entity.type) {
case 'bold':
return `<strong>`
case 'text_link':
return `<a href="${entity.url}" target="_blank">`
case 'url':
return `<a href="${entityText}" target="_blank">`
case 'italic':
return `<em>`
case "code":
return `<code>`
case "strikethrough":
return `<s>`
case "underline":
return `<u>`
case "pre":
return `<pre>`
case 'mention':
return `<a href="https://t.me/${entityText.replace('@', '')}" target="_blank">`
case 'email':
return `<a href="mailto:${entityText}">`
case 'phone_number':
return `<a href="tel:${entityText}">`
}
}
Guys, your codes gave me basic ideas and I made my own Python3 solution. It works with nested entities too.
https://github.com/survtur/TelethonMessageToHtmlConverter
Thanks, @LeonidShastel , used your code and added typescript types to use with grammY
import type { Context } from 'grammy';
import type { MessageEntity } from 'grammy/out/types.node';
export const parseTelegramMessage = (ctx: Context) => {
const text = ctx.msg?.text;
const entities = ctx.msg?.entities;
if (!entities || !text) {
return text;
}
let tags: { index: number; tag: string | undefined }[] = [];
entities.forEach((entity) => {
const startTag = getTag(entity, text);
let searchTag = tags.filter((tag) => tag.index === entity.offset);
if (searchTag.length > 0 && startTag) searchTag[0].tag += startTag;
else
tags.push({
index: entity.offset,
tag: startTag
});
const closeTag = startTag?.indexOf('<a ') === 0 ? '</a>' : '</' + startTag?.slice(1);
searchTag = tags.filter((tag) => tag.index === entity.offset + entity.length);
if (searchTag.length > 0) searchTag[0].tag = closeTag + searchTag[0].tag;
else
tags.push({
index: entity.offset + entity.length,
tag: closeTag
});
});
let html = '';
for (let i = 0; i < text.length; i++) {
const tag = tags.filter((tag) => tag.index === i);
tags = tags.filter((tag) => tag.index !== i);
if (tag.length > 0) html += tag[0].tag;
html += text[i];
}
if (tags.length > 0) html += tags[0].tag;
return html;
};
const getTag = (entity: MessageEntity, text: string) => {
const entityText = text.slice(entity.offset, entity.offset + entity.length);
switch (entity.type) {
case 'bold':
return `<strong>`;
case 'text_link':
return `<a href="${entity.url}" target="_blank">`;
case 'url':
return `<a href="${entityText}" target="_blank">`;
case 'italic':
return `<em>`;
case 'code':
return `<code>`;
case 'strikethrough':
return `<s>`;
case 'underline':
return `<u>`;
case 'pre':
return `<pre>`;
case 'mention':
return `<a href="https://t.me/${entityText.replace('@', '')}" target="_blank">`;
case 'email':
return `<a href="mailto:${entityText}">`;
case 'phone_number':
return `<a href="tel:${entityText}">`;
}
};
Function for @mtproto/core, for parsing messages
export const parseTelegramMessage = function (msg) {
const text = msg.message || msg.caption;
const entities = msg.entities || msg.caption_entities;
if (!entities) {
return text;
}
let html = "";
entities.forEach((entity, index) => {
// Characters before entity
if (index === 0) {
html += text.slice(0, entity.offset);
}
// Handle entity transformation
const entityText = text.slice(
entity.offset,
entity.offset + entity.length
);
switch (entity._) {
case "messageEntityBold":
html += `<strong>${entityText}</strong>`;
break;
case "messageEntityPre":
html += `<pre>${entityText}</pre>`;
break;
case "messageEntityCode":
html += `<code>${entityText}</code>`;
break;
case "messageEntityStrike":
html += `<s>${entityText}</s>`;
break;
case "messageEntityUnderline":
html += `<u>${entityText}</u>`;
break;
case "messageEntitySpoiler":
html += `<span class="tg-spoiler">${entityText}</span>`;
break;
case "messageEntityUrl":
case "messageEntityTextUrl":
html += `<a href="${entity.url}" target="_blank">${entityText}</a>`;
break;
case "messageEntityItalic":
html += `<em>${entityText}</em>`;
break;
case "messageEntityMention":
html += `<a href="https://t.me/${entityText.replace(
"@",
""
)}" target="_blank">${entityText}</a>`;
break;
case "messageEntityEmail":
html += `<a href="mailto:${entityText}">${entityText}</a>`;
break;
case "messageEntityPhone":
html += `<a href="tel:${entityText}">${entityText}</a>`;
break;
default:
html += `${entityText}`;
}
// Characters after entity but before next entity
if (entities.length > index + 1) {
html += text.slice(
entity.offset + entity.length,
entities[index + 1].offset
);
}
// Last characters after last entity
if (entities.length === index + 1) {
html += text.slice(entity.offset + entity.length);
}
});
return html;
};
This won't work when you have, for instance, bold link.