Last active
March 26, 2019 17:01
-
-
Save t510599/c4fe5949defc39110b6a7917915f8878 to your computer and use it in GitHub Desktop.
FB 下載回來的資訊有夠坑人,好好的 utf-8 byte 被編成 \u00??,想解讀聊天紀錄都很不方便。 https://blog.stoneapp.tech/post.php?pid=139
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* required ES6 */ | |
var fs = require('fs'); | |
var path = require('path'); | |
var json = JSON.parse(fs.readFileSync(path.resolve(__dirname, "./message.json"), 'utf8')); | |
var messages = json["messages"]; | |
/* var messages = [ | |
{ | |
"sender_name": "Tony Yang", | |
"timestamp": 1526469371, | |
"content": "\u00e5\u00a6\u0082\u00e6\u009e\u009c\u00e6\u009c\u0089\u00e7\u009c\u008b\u00e5\u0088\u00b0\u00e7\u009a\u0084\u00e8\u00a9\u00b1\u00e5\u00b9\u00ab\u00e6\u0088\u0091\u00e5\u00a1\u00ab\u00e4\u00b8\u0080\u00e4\u00b8\u008b \u00e6\u0084\u009f\u00e8\u00ac\u009d", | |
"type": "Generic" | |
}, | |
{ | |
"sender_name": "Tony Yang", | |
"timestamp": 1526469357, | |
"content": "https://www.surveycake.com/s/wb7mP", | |
"share": { | |
"link": "https://www.surveycake.com/s/wb7mP" | |
}, | |
"type": "Share" | |
}]; | |
*/ | |
var result = []; | |
for (msg of messages) { | |
result.push(decodeJson(msg)); | |
} | |
json = decodeJson(json); | |
json["messages"] = result; | |
fs.writeFileSync(path.resolve(__dirname,"./output.json"), JSON.stringify(json,null,2)); | |
function decodeJson(encoded) { | |
for (item in encoded) { | |
if (["sender_name","content","participants","title"].indexOf(item) == -1) { | |
continue; | |
} | |
if (item == "participants") { | |
tmp = [] | |
for (person of encoded[item]) { | |
tmp.push(unescape(decode(String(person)))); | |
} | |
encoded[item] = tmp; | |
continue; | |
} | |
encoded[item] = unescape(decode(String(encoded[item]))); | |
} | |
return encoded; | |
} | |
function decode(encodedTxt) { | |
var decodedContent = ""; | |
var tmpArr = []; | |
for (char of encodedTxt) { | |
code = char.charCodeAt(0) | |
if ((code > 31 && code < 127) || code == 10) { | |
decodedContent+=char; | |
} else { | |
if (tmpArr.length == 0) { | |
if (code >= 192 && code <= 223) { | |
len = 2; | |
} else if (code >= 224 && code <= 239) { | |
len = 3; | |
} else if (code >= 240 && code <= 247) { | |
len = 4; | |
} | |
} | |
tmpArr.push(char); | |
if (tmpArr.length == len) { | |
decodedContent += utf8ToText(tmpArr,len); | |
tmpArr = []; | |
} | |
} | |
} | |
return decodedContent; | |
} | |
function utf8ToText(array,len){ | |
var charCode = 0; | |
for (index = 0; index < array.length; index++) { | |
charByte = array[index].charCodeAt(0); | |
switch (len) { | |
case 2: | |
switch (index) { | |
case 0: // first | |
charCode = charCode | ((charByte ^ 192) << 6); | |
break; | |
case 1: // sec | |
charCode = charCode | (charByte ^ 128); | |
break; | |
} | |
break; | |
case 3: | |
switch (index) { | |
case 0: // first | |
charCode = charCode | ((charByte ^ 224) << 12); | |
break; | |
case 1: // sec | |
charCode = charCode | ((charByte ^ 128) << 6); | |
break; | |
case 2: // third | |
charCode = charCode | (charByte ^ 128); | |
break; | |
} | |
break; | |
case 4: | |
switch (index) { | |
case 0: // first | |
charCode = charCode | ((charByte ^ 240) << 18); | |
break; | |
case 1: // sec | |
charCode = charCode | ((charByte ^ 128) << 12); | |
break; | |
case 2: // third | |
charCode = charCode | ((charByte ^ 128) << 6); | |
break; | |
case 3: // forth | |
charCode = charCode | (charByte ^ 128); | |
break; | |
} | |
break; | |
} | |
} | |
return String.fromCodePoint(charCode); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment