Skip to content

Instantly share code, notes, and snippets.

@AMD-NICK
Last active December 8, 2024 14:45
Show Gist options
  • Save AMD-NICK/7651b17de852301bbfb68cbab2e21342 to your computer and use it in GitHub Desktop.
Save AMD-NICK/7651b17de852301bbfb68cbab2e21342 to your computer and use it in GitHub Desktop.
Попытка сделать в гмоде корректный срез UTF16. Вогель говорит, что работает, но для энтитей телеги, куда и предназначался, не пашет
local function utf8to16(sData)
local ret = ""
for utf8char in string.gmatch(sData, "[%z\x01-\x7F\xC2-\xF4][\x80-\xBF]*") do
local codepoint = utf8.codepoint(utf8char)
ret = ret .. string.char(
bit.band(codepoint, 0xff),
bit.band(bit.rshift(codepoint, 8), 0xff)
)
end
return ret
end
local function utf16to8(sData)
local ret = ""
if #sData % 2 ~= 0 then error("Not pow. of 2") end
for index = 1, #sData, 2 do
local byte1, byte2 = string.byte(sData, index, index + 1)
local codepoint = bit.bor(byte1, bit.lshift(byte2, 8))
ret = ret .. utf8.char(codepoint)
end
return ret
end
local function sub(sData, nStart, nEnd)
local str = utf8to16(sData)
local len = #str
return utf16to8(str:sub(nStart and nStart * 2 - 1 or 1, nEnd and nEnd * 2 or len))
end
-- Портировал JS код, но потом узнал, что в ЖС с ютф16 и так порядок. Код оказался непригоден
-- utf8To16: function (input) {
-- var _escape = function(s) {
-- function q(c) {
-- c = c.charCodeAt();
-- return '%' + (c<16 ? '0' : '') + c.toString(16).toUpperCase();
-- }
-- return s.replace(/[\x00-),:-?[-^`{-\xFF]/g, q);
-- };
-- try{
-- return decodeURIComponent(_escape(input));
-- }catch (URIError) {
-- //include invalid character, cannot convert
-- return input;
-- }
-- },
-- utf16To8: function (input) {
-- var _unescape = function(s) {
-- function d(x, n) {
-- return String.fromCharCode(parseInt(n, 16));
-- }
-- return s.replace(/%([0-9A-F]{2})/ig, d);
-- };
-- try{
-- return _unescape(encodeURIComponent(input));
-- }catch (URIError) {
-- //include invalid character, cannot convert
-- return input;
-- }
-- },
local function utf8to16(input)
-- [\x00-),:-?[-^`{-\xFF]
return input:gsub("[%z\x01-%),:-%?%[-%^`{-\xFF]", function(c)
local b = string.byte(c)
local cc = "%" .. (b < 16 and "0" or "") .. string.format("%X",b)
print(b, cc)
return сс
end)--:URLDecode()
end
local function utf16to8(input) -- а оно чет делает вообще?
local encoded = input:URLEncode()
return encoded:gsub("%%(%x%x)", function(hh)
-- print("hh", hh, tonumber(hh,16))
return string.char(tonumber(hh,16))
-- return tonumber(hh,16) .. " "
end) --:URLEncode()
end
local input = "🕹 New Item!"
local utf16 = utf8to16(input)
-- local utf8 = utf16to8(utf16)
print( #input, #utf16 )
-- print( utf8, input:URLEncode() )
local function pushika(...)
PRINT(...)
end
function string.utf16sub(str, a,b, PUSH)
if PUSH then
end
end
local f = vgui.Create('DHTML')
f:SetSize(100, 100)
f:SetPaintedManually(true)
f:SetMouseInputEnabled(false)
f:SetAllowLua(true)
f:SetHTML([[
<script type='text/JavaScript'>
function sub(input, startl, endl) {
out = input.substring(startl, endl);
console.log('RUNLUA:pushika("'+out+'")');
}
</script>
]])
local text = "🕹 New DMarket Item!";
local offset = 3;
local length = 17;
local startl = offset;
local endl = startl + length;
f:RunJavascript('sub("' .. text .. '", ' .. startl .. ', ' .. endl .. ');')
timer.Simple(5,function()
if IsValid(f) then
f:Remove()
end
end)
@AMD-NICK
Copy link
Author

AMD-NICK commented Dec 8, 2024

Попытка корректно делать sub для utf16 телеграма

-- -- Аналог PHP unpack('C*', pack('H*', $hash)); и скорее всего implode(unpack("H*", $string));
-- -- И PY "89abc".decode('hex')
-- -- В PHP выручило это: https://gist.github.com/8f235b9dfd1ff1dda1d63c1df77a861e
-- function string.hexify(str) -- a08572
-- 	local arr = {}
-- 	str:gsub("%x%x", function(hh) -- a0 85 72
-- 		arr[#arr + 1] = tonumber(hh,16)
-- 	end)
-- 	return arr -- {160, 133, 114}
-- end


-- Как URLEncode только всех символов
-- #text == #text:toHex():URLDecode()
-- function string:toHex()
-- 	return string.gsub(self, '(.)', function(c)
-- 		return string.format('%%%02X', string.byte(c))
-- 	end)
-- end

-- if 1 == 1 then
-- 	-- local str = "abcXYZ"
-- 	-- print(str:Hexify())

-- 	print(string.toHex("Hello World!"))
-- 	return
-- end


local function utf8to16(sData)
	local ret = ""
	for utf8char in string.gmatch(sData, "[%z\x01-\x7F\xC2-\xF4][\x80-\xBF]*") do
		local codepoint = utf8.codepoint(utf8char)
		ret = ret .. string.char(
				bit.band(codepoint, 0xff),
				bit.band(bit.rshift(codepoint, 8), 0xff)
			)
	end
	return ret
end

local function utf16to8(sData)
	local ret = ""
	if #sData % 2 ~= 0 then error("Not pow. of 2") end
	for index = 1, #sData, 2 do
		local byte1, byte2 = string.byte(sData, index, index + 1)
		local codepoint = bit.bor(byte1, bit.lshift(byte2, 8))
		ret = ret .. utf8.char(codepoint)
	end
	return ret
end

local function sub(sData, nStart, nEnd)
	local str = utf8to16(sData)
	local len = #str
	return utf16to8(str:sub(nStart and nStart * 2 - 1 or 1, nEnd and nEnd * 2 or len))
end

if 1 == 1 then
	-- local text = "🕹 New DMarket Item! AK-47 | Asiimov (Field-Tested) 💵 Price:   42 🛒 (02)Act: 0, 14% 🛍 (07)Pre: 12, 2, 12, 10, 5%  Profitable than: 5 latest closed ██████████████████░░ (of all)  🖼 Иконка | Steam"
	local text = "🕹 New DMarket Item!\nAK-47 | Asiimov (Field-Tested)\n💵 Price:   42\n🛒 (02)Act: 0, 14%\n🛍 (07)Pre: 12, 2, 12, 10, 5%\n\nProfitable than: 5 latest closed\n██████████████████░░ (of all)\n\n🖼 Иконка | Steam"
	-- local text = "1234567890"

	-- text = text:gsub("\n", "  ")
	-- print(text)

	print(sub(text, 3, 3 + 17))     -- bold       New DMarket Item
	print(sub(text, 21, 21 + 30))   -- text_link  AK-47 | Asiimov (Field-Tested)
	print(sub(text, 55, 55 + 8))    -- code       price:
	print(sub(text, 70, 70 + 8))    -- code       Act:
	print(sub(text, 89, 89 + 8))    -- code       Pre:
	print(sub(text, 150, 150 + 20)) -- code       [bar]
	print(sub(text, 184, 184 + 6))  -- text_link  Иконка
	print(sub(text, 193, 193 + 5))  -- text_link  Steam

	return
end



local patterns = {
	["bold"]	  = "*%s*",
	["text_link"] = "[%s](%s)",
	["code"]	  = "`%s`",
}

-- Превращает ent в форматную строку
local function formatEnt(ent, txt)
	local copy = txt:gsub("\n", "  ") -- \n utf8sub воспринимает как 2 символа и со строками сбивается
	local sub = copy:utf8sub(ent.offset + 1, ent.offset + ent.length) -- M4A4 | Hellfire (Field-Tested)

	local pattern = patterns[ent.type]
	if !pattern then -- нет поддержки типа энтити
		return sub
	end

	-- print(pattern)
	-- MsgN(sub)

	local sEnt = pattern:format(sub, ent.url) -- e.g. [name](url)
	return sEnt
end

-- Присоединяет форматную ent к txt
-- local function concatEnt(ent, txt)

-- end

local function applyEntities(tEntities, txt)
	if #tEntities == 0 then
		return txt
	end

	local s = txt:sub(1, tEntities[1].offset) -- RAW text до первой энтити
	for _,ent in ipairs(tEntities) do
		local sEnt = formatEnt(ent, txt)
		s = s .. sEnt
	end

	local last = tEntities[#tEntities]
	return s:sub(last.offset + last.length) -- RAW остатки после последней
end

local text = "🕹 New DMarket Item!\nAK-47 | Asiimov (Field-Tested)\n💵 Price:   42\n🛒 (02)Act: 0, 14%\n🛍 (07)Pre: 12, 2, 12, 10, 5%\n\nProfitable than: 5 latest closed\n██████████████████░░ (of all)\n\n🖼 Иконка | Steam"
local ents = {
	{
		['type']   = 'bold',
		['length'] = 17,
		['offset'] = 3,
	},
	{
		['type']   = 'text_link',
		['length'] = 30,
		['url']    = 'https://dmarket.com/ru/ingame-items/product-card/ak-47-asiimov-field-tested',
		['offset'] = 21,
	},
	{
		['type']   = 'code',
		['length'] = 8,
		['offset'] = 55,
	},
	{
		['type']   = 'code',
		['length'] = 8,
		['offset'] = 70,
	},
	{
		['type']   = 'code',
		['length'] = 8,
		['offset'] = 89,
	},
	{
		['type']   = 'code',
		['length'] = 20,
		['offset'] = 150,
	},
	{
		['type']   = 'text_link',
		['length'] = 6,
		['url']    = 'https://steamcommunity-a.akamaihd.net/economy/image/-9a81dlWLwJ2UUGcVs_nsVtzdOEdtWwKGZZLQHTxDZ7I56KU0Zwwo4NUX4oFJZEHLbXH5ApeO4YmlhxYQknCRvCo04DEVlxkKgpot7HxfDhjxszJemkV092lnYmGmOHLPr7Vn35cppQiiOuQpoml3wW18xdkNTjxd9CQdwM_ZlrT-lW_kLzu0560vp-azXJ9-n51Q5-Fea0',
		['offset'] = 184,
	},
	{
		['type']   = 'text_link',
		['length'] = 5,
		['url']    = 'https://steamcommunity.com/profiles/76561198332732966/inventory/#730_2_15744095683',
		['offset'] = 193,
	},
}

print(formatEnt(ents[6], text))

-- print(applyEntities(ents, text))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment