Skip to content

Instantly share code, notes, and snippets.

@t510599
Last active March 26, 2019 17:01
Show Gist options
  • Save t510599/c4fe5949defc39110b6a7917915f8878 to your computer and use it in GitHub Desktop.
Save t510599/c4fe5949defc39110b6a7917915f8878 to your computer and use it in GitHub Desktop.
FB 下載回來的資訊有夠坑人,好好的 utf-8 byte 被編成 \u00??,想解讀聊天紀錄都很不方便。 https://blog.stoneapp.tech/post.php?pid=139
/* required ES6 */
var fs = require('fs');
var path = require('path');
var json = JSON.parse(fs.readFileSync(path.resolve(__dirname, "./message.json"), 'utf8'));
var messages = json["messages"];
/* var messages = [
{
"sender_name": "Tony Yang",
"timestamp": 1526469371,
"content": "\u00e5\u00a6\u0082\u00e6\u009e\u009c\u00e6\u009c\u0089\u00e7\u009c\u008b\u00e5\u0088\u00b0\u00e7\u009a\u0084\u00e8\u00a9\u00b1\u00e5\u00b9\u00ab\u00e6\u0088\u0091\u00e5\u00a1\u00ab\u00e4\u00b8\u0080\u00e4\u00b8\u008b \u00e6\u0084\u009f\u00e8\u00ac\u009d",
"type": "Generic"
},
{
"sender_name": "Tony Yang",
"timestamp": 1526469357,
"content": "https://www.surveycake.com/s/wb7mP",
"share": {
"link": "https://www.surveycake.com/s/wb7mP"
},
"type": "Share"
}];
*/
var result = [];
for (msg of messages) {
result.push(decodeJson(msg));
}
json = decodeJson(json);
json["messages"] = result;
fs.writeFileSync(path.resolve(__dirname,"./output.json"), JSON.stringify(json,null,2));
function decodeJson(encoded) {
for (item in encoded) {
if (["sender_name","content","participants","title"].indexOf(item) == -1) {
continue;
}
if (item == "participants") {
tmp = []
for (person of encoded[item]) {
tmp.push(unescape(decode(String(person))));
}
encoded[item] = tmp;
continue;
}
encoded[item] = unescape(decode(String(encoded[item])));
}
return encoded;
}
function decode(encodedTxt) {
var decodedContent = "";
var tmpArr = [];
for (char of encodedTxt) {
code = char.charCodeAt(0)
if ((code > 31 && code < 127) || code == 10) {
decodedContent+=char;
} else {
if (tmpArr.length == 0) {
if (code >= 192 && code <= 223) {
len = 2;
} else if (code >= 224 && code <= 239) {
len = 3;
} else if (code >= 240 && code <= 247) {
len = 4;
}
}
tmpArr.push(char);
if (tmpArr.length == len) {
decodedContent += utf8ToText(tmpArr,len);
tmpArr = [];
}
}
}
return decodedContent;
}
function utf8ToText(array,len){
var charCode = 0;
for (index = 0; index < array.length; index++) {
charByte = array[index].charCodeAt(0);
switch (len) {
case 2:
switch (index) {
case 0: // first
charCode = charCode | ((charByte ^ 192) << 6);
break;
case 1: // sec
charCode = charCode | (charByte ^ 128);
break;
}
break;
case 3:
switch (index) {
case 0: // first
charCode = charCode | ((charByte ^ 224) << 12);
break;
case 1: // sec
charCode = charCode | ((charByte ^ 128) << 6);
break;
case 2: // third
charCode = charCode | (charByte ^ 128);
break;
}
break;
case 4:
switch (index) {
case 0: // first
charCode = charCode | ((charByte ^ 240) << 18);
break;
case 1: // sec
charCode = charCode | ((charByte ^ 128) << 12);
break;
case 2: // third
charCode = charCode | ((charByte ^ 128) << 6);
break;
case 3: // forth
charCode = charCode | (charByte ^ 128);
break;
}
break;
}
}
return String.fromCodePoint(charCode);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment