Skip to content

Instantly share code, notes, and snippets.

@bouiboui
Forked from halfnibble/htmlToAST.js
Created March 19, 2021 09:16
Show Gist options
  • Save bouiboui/7e4a4561e31ab6a5700225ff0122858f to your computer and use it in GitHub Desktop.
Save bouiboui/7e4a4561e31ab6a5700225ff0122858f to your computer and use it in GitHub Desktop.
GraphCMS HTML to RichTextAST (TypeScript version)
import {jsx} from "slate-hyperscript";
import {JSDOM} from "jsdom";
const DOMParser = new JSDOM().window.DOMParser
const htmlToAST = (html: string) => {
const parsed = new DOMParser().parseFromString(html, 'text/html')
return deserialize(parsed.body)
}
export default htmlToAST
const ELEMENT_TAGS: Record<string, (el: HTMLElement) => Record<string, string | null>> = {
A: (el: HTMLElement) => ({type: 'link', url: el.getAttribute('href')}),
BLOCKQUOTE: () => ({type: 'quote'}),
H1: () => ({type: 'heading-one'}),
H2: () => ({type: 'heading-two'}),
H3: () => ({type: 'heading-three'}),
H4: () => ({type: 'heading-four'}),
H5: () => ({type: 'heading-five'}),
H6: () => ({type: 'heading-six'}),
IMG: (el: HTMLElement) => ({type: 'image', url: el.getAttribute('src')}),
LI: () => ({type: 'list-item'}),
OL: () => ({type: 'numbered-list'}),
P: () => ({type: 'paragraph'}),
PRE: () => ({type: 'code'}),
UL: () => ({type: 'bulleted-list'}),
}
// COMPAT: `B` is omitted here because Google Docs uses `<b>` in weird ways.
const TEXT_TAGS: Record<string, (el: HTMLElement) => Record<string, boolean>> = {
CODE: () => ({code: true}),
DEL: () => ({strikethrough: true}),
EM: () => ({italic: true}),
I: () => ({italic: true}),
S: () => ({strikethrough: true}),
STRONG: () => ({bold: true}),
U: () => ({underline: true}),
}
const deserialize: (el: HTMLElement) => (string | null) = el => {
if (el.nodeType === 3) {
return el.textContent
} else if (el.nodeType !== 1) {
return null
} else if (el.nodeName === 'BR') {
return '\n'
}
const {nodeName} = el
let parent = el
if (
nodeName === 'PRE' &&
el.childNodes[0] &&
el.childNodes[0].nodeName === 'CODE'
) {
parent = el.childNodes[0] as HTMLElement
}
const children = (Array.from(parent.childNodes) as HTMLElement[])
.map(deserialize)
.flat()
if (el.nodeName === 'BODY') {
return jsx('fragment', {}, children)
}
if (ELEMENT_TAGS[nodeName]) {
const attrs = ELEMENT_TAGS[nodeName](el)
return jsx('element', attrs, children)
}
if (TEXT_TAGS[nodeName]) {
const attrs = TEXT_TAGS[nodeName](el)
return children.map((child) => jsx('text', attrs, child))
}
return children
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment