Skip to content

Instantly share code, notes, and snippets.

@chuv1
Last active November 18, 2023 09:37
Show Gist options
  • Save chuv1/9641abc8a5a1a9b3bb8c9177fb7ffa9e to your computer and use it in GitHub Desktop.
Save chuv1/9641abc8a5a1a9b3bb8c9177fb7ffa9e to your computer and use it in GitHub Desktop.
Test function to convert telegram message entities into HTML markup.
public function getHTML($without_cmd = false){
if(empty($this->getEntities())){
return $this->getText($without_cmd);
}
$text = $this->getText();
$html = '';
$entities_count = \count($this->getEntities())-1;
foreach($this->getEntities() as $k => $entity){
if($k === 0){
$html .= mb_substr($text, 0, $entity->getOffset());
}
switch($entity->getType()){
default:
case 'mention':
case 'hashtag':
case 'cashtag':
case 'bot_command':
case 'url':
case 'email':
case 'phone_number':
$html .= mb_substr($text, $entity->getOffset(), $entity->getLength());
break;
case 'text_mention':
$html .= '<a href="tg://user?id='.$entity->getUser()->getId().'">'.mb_substr($text, $entity->getOffset(), $entity->getLength()).'</a>';
break;
case 'text_link':
$html .= '<a href="'.$entity->getUrl().'">'.mb_substr($text, $entity->getOffset(), $entity->getLength()).'</a>';
break;
case 'bold':
$html .= '<b>' . mb_substr($text, $entity->getOffset(), $entity->getLength()) . '</b>';
break;
case 'italic':
$html .= '<i>' . mb_substr($text, $entity->getOffset(), $entity->getLength()) . '</i>';
break;
case 'code':
$html .= '<code>' . mb_substr($text, $entity->getOffset(), $entity->getLength()) . '</code>';
break;
case 'pre':
$html .= '<pre>' . mb_substr($text, $entity->getOffset(), $entity->getLength()) . '</pre>';
break;
}
if($k === $entities_count){
$html .= mb_substr($text, $entity->getOffset() + $entity->getLength());
}
}
return $html;
}
@ak4zh
Copy link

ak4zh commented Dec 19, 2022

Thanks, @LeonidShastel , used your code and added typescript types to use with grammY

import type { Context } from 'grammy';
import type { MessageEntity } from 'grammy/out/types.node';

export const parseTelegramMessage = (ctx: Context) => {
	const text = ctx.msg?.text;
	const entities = ctx.msg?.entities;

	if (!entities || !text) {
		return text;
	}

	let tags: { index: number; tag: string | undefined }[] = [];

	entities.forEach((entity) => {
		const startTag = getTag(entity, text);
		let searchTag = tags.filter((tag) => tag.index === entity.offset);
		if (searchTag.length > 0 && startTag) searchTag[0].tag += startTag;
		else
			tags.push({
				index: entity.offset,
				tag: startTag
			});

		const closeTag = startTag?.indexOf('<a ') === 0 ? '</a>' : '</' + startTag?.slice(1);
		searchTag = tags.filter((tag) => tag.index === entity.offset + entity.length);
		if (searchTag.length > 0) searchTag[0].tag = closeTag + searchTag[0].tag;
		else
			tags.push({
				index: entity.offset + entity.length,
				tag: closeTag
			});
	});
	let html = '';
	for (let i = 0; i < text.length; i++) {
		const tag = tags.filter((tag) => tag.index === i);
		tags = tags.filter((tag) => tag.index !== i);
		if (tag.length > 0) html += tag[0].tag;
		html += text[i];
	}
	if (tags.length > 0) html += tags[0].tag;

	return html;
};

const getTag = (entity: MessageEntity, text: string) => {
	const entityText = text.slice(entity.offset, entity.offset + entity.length);

	switch (entity.type) {
		case 'bold':
			return `<strong>`;
		case 'text_link':
			return `<a href="${entity.url}" target="_blank">`;
		case 'url':
			return `<a href="${entityText}" target="_blank">`;
		case 'italic':
			return `<em>`;
		case 'code':
			return `<code>`;
		case 'strikethrough':
			return `<s>`;
		case 'underline':
			return `<u>`;
		case 'pre':
			return `<pre>`;
		case 'mention':
			return `<a href="https://t.me/${entityText.replace('@', '')}" target="_blank">`;
		case 'email':
			return `<a href="mailto:${entityText}">`;
		case 'phone_number':
			return `<a href="tel:${entityText}">`;
	}
};

@kvalood
Copy link

kvalood commented Jul 19, 2023

Function for @mtproto/core, for parsing messages

export const parseTelegramMessage = function (msg) {
	const text = msg.message || msg.caption;
	const entities = msg.entities || msg.caption_entities;

	if (!entities) {
		return text;
	}

	let html = "";

	entities.forEach((entity, index) => {
		// Characters before entity
		if (index === 0) {
			html += text.slice(0, entity.offset);
		}

		// Handle entity transformation
		const entityText = text.slice(
			entity.offset,
			entity.offset + entity.length
		);

		switch (entity._) {
			case "messageEntityBold":
				html += `<strong>${entityText}</strong>`;
				break;
			case "messageEntityPre":
				html += `<pre>${entityText}</pre>`;
				break;
			case "messageEntityCode":
				html += `<code>${entityText}</code>`;
				break;
			case "messageEntityStrike":
				html += `<s>${entityText}</s>`;
				break;
			case "messageEntityUnderline":
				html += `<u>${entityText}</u>`;
				break;
			case "messageEntitySpoiler":
				html += `<span class="tg-spoiler">${entityText}</span>`;
				break;
            case "messageEntityUrl":
			case "messageEntityTextUrl":
				html += `<a href="${entity.url}" target="_blank">${entityText}</a>`;
				break;
			case "messageEntityItalic":
				html += `<em>${entityText}</em>`;
				break;
			case "messageEntityMention":
				html += `<a href="https://t.me/${entityText.replace(
					"@",
					""
				)}" target="_blank">${entityText}</a>`;
				break;
			case "messageEntityEmail":
				html += `<a href="mailto:${entityText}">${entityText}</a>`;
				break;
			case "messageEntityPhone":
				html += `<a href="tel:${entityText}">${entityText}</a>`;
				break;
			default:
				html += `${entityText}`;
		}

		// Characters after entity but before next entity
		if (entities.length > index + 1) {
			html += text.slice(
				entity.offset + entity.length,
				entities[index + 1].offset
			);
		}

		// Last characters after last entity
		if (entities.length === index + 1) {
			html += text.slice(entity.offset + entity.length);
		}
	});

	return html;
};

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment