Scrapes emojis from Unicode's full emoji list, works as of 2022-04-14.
Designed for Node, though with a little bit of adaptation it will probably work on browsers.
Requires:
Scrapes emojis from Unicode's full emoji list, works as of 2022-04-14.
Designed for Node, though with a little bit of adaptation it will probably work on browsers.
Requires:
// eslint-disable-next-line import/no-extraneous-dependencies | |
import { parse, HTMLElement } from 'node-html-parser'; | |
import { existsSync, mkdirSync, writeFile } from 'fs'; | |
import axios from 'axios'; | |
const EMOJI_CHARACTER_CELL_INDEX: number = 2; | |
const UNICODE_FULL_EMOJI_LIST: string = 'https://unicode.org/emoji/charts/full-emoji-list.html'; | |
enum EmojiVariant { | |
APPLE = 3, | |
GOOGLE = 4, | |
FACEBOOK = 5, | |
WINDOWS = 6, | |
TWEMOJI = 7, | |
JOYPIXELS = 8, | |
SAMSUNG = 9, | |
GMAIL = 10, | |
SOFTBANK = 11, | |
DOCOMO = 12, | |
KDDI = 13, | |
} | |
async function parseTableRow(tableRow: HTMLElement, variant: EmojiVariant): Promise<{ [key: string]: string; } | null> { | |
const tableCells = tableRow.querySelectorAll('td'); | |
if (tableCells?.length < variant + 1) { // Does it have a cell for the variant we want? | |
return null; | |
} | |
if (tableCells[variant].innerHTML === '—') { // Is the cell populated with an emoji, or a placeholder? | |
return null; | |
} | |
const emoji = tableCells[EMOJI_CHARACTER_CELL_INDEX].innerHTML; | |
const twemoji = tableCells[variant]?.querySelector('img')?.getAttribute('src'); | |
if (emoji && twemoji) { | |
const obj: { [key: string]: string; } = {}; | |
obj[emoji] = twemoji; | |
return obj; | |
} else { | |
return null; | |
} | |
} | |
async function fetchUnicodeWebsite(): Promise<string> { | |
return new Promise((resolve, reject) => { | |
axios.get(UNICODE_FULL_EMOJI_LIST, { | |
responseType: 'stream', | |
}) | |
.then(resp => { | |
const length = Number(resp.headers['content-length']); | |
const chunks: any[] = []; | |
let fetched: number = 0; | |
resp.data.on('data', (chunk: any) => { | |
fetched += chunk.length; | |
process.stdout.write(`Fetching emoji table... ${(fetched / length * 100).toFixed(2)}% of ${(length / 1000000).toFixed(2)}MB\r`); | |
chunks.push(Buffer.from(chunk)); | |
}); | |
resp.data.on('end', () => { | |
resolve(Buffer.concat(chunks).toString('utf8')); | |
}); | |
resp.data.on('error', (err: any) => { | |
reject(err); | |
}); | |
}); | |
}); | |
} | |
async function scrapeEmojis(variant: EmojiVariant): Promise<{ [key: string]: string; }> { | |
const resp = await fetchUnicodeWebsite(); | |
const root: HTMLElement = parse(resp); | |
const table = root.querySelector('table'); | |
if (!table) { | |
throw new Error('broken code!'); | |
} | |
const parsedTable = <{ [key: string]: string; }[]>(await Promise.all(table.querySelectorAll('tr').map(tableRow => parseTableRow(tableRow, variant)))).filter(value => value !== null); | |
return parsedTable.reduce((prev, curr) => { | |
return Object.assign(prev, curr); | |
}); | |
} | |
(async () => { | |
const emojiTable = await scrapeEmojis(EmojiVariant.TWEMOJI); | |
if (!existsSync('./data')) { | |
mkdirSync('./data'); | |
} | |
writeFile('data/emojis.json', JSON.stringify(emojiTable).replace(/[\u007F-\uFFFF]/g, (chr) => `\\u${('0000' + chr.charCodeAt(0).toString(16)).slice(-4)}`), {}, (err: any) => { | |
if (err) { console.log(err); } | |
}); | |
})(); |
crazy shit