-
-
Save bouiboui/7e4a4561e31ab6a5700225ff0122858f to your computer and use it in GitHub Desktop.
GraphCMS HTML to RichTextAST (TypeScript version)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import {jsx} from "slate-hyperscript"; | |
import {JSDOM} from "jsdom"; | |
const DOMParser = new JSDOM().window.DOMParser | |
const htmlToAST = (html: string) => { | |
const parsed = new DOMParser().parseFromString(html, 'text/html') | |
return deserialize(parsed.body) | |
} | |
export default htmlToAST | |
const ELEMENT_TAGS: Record<string, (el: HTMLElement) => Record<string, string | null>> = { | |
A: (el: HTMLElement) => ({type: 'link', url: el.getAttribute('href')}), | |
BLOCKQUOTE: () => ({type: 'quote'}), | |
H1: () => ({type: 'heading-one'}), | |
H2: () => ({type: 'heading-two'}), | |
H3: () => ({type: 'heading-three'}), | |
H4: () => ({type: 'heading-four'}), | |
H5: () => ({type: 'heading-five'}), | |
H6: () => ({type: 'heading-six'}), | |
IMG: (el: HTMLElement) => ({type: 'image', url: el.getAttribute('src')}), | |
LI: () => ({type: 'list-item'}), | |
OL: () => ({type: 'numbered-list'}), | |
P: () => ({type: 'paragraph'}), | |
PRE: () => ({type: 'code'}), | |
UL: () => ({type: 'bulleted-list'}), | |
} | |
// COMPAT: `B` is omitted here because Google Docs uses `<b>` in weird ways. | |
const TEXT_TAGS: Record<string, (el: HTMLElement) => Record<string, boolean>> = { | |
CODE: () => ({code: true}), | |
DEL: () => ({strikethrough: true}), | |
EM: () => ({italic: true}), | |
I: () => ({italic: true}), | |
S: () => ({strikethrough: true}), | |
STRONG: () => ({bold: true}), | |
U: () => ({underline: true}), | |
} | |
const deserialize: (el: HTMLElement) => (string | null) = el => { | |
if (el.nodeType === 3) { | |
return el.textContent | |
} else if (el.nodeType !== 1) { | |
return null | |
} else if (el.nodeName === 'BR') { | |
return '\n' | |
} | |
const {nodeName} = el | |
let parent = el | |
if ( | |
nodeName === 'PRE' && | |
el.childNodes[0] && | |
el.childNodes[0].nodeName === 'CODE' | |
) { | |
parent = el.childNodes[0] as HTMLElement | |
} | |
const children = (Array.from(parent.childNodes) as HTMLElement[]) | |
.map(deserialize) | |
.flat() | |
if (el.nodeName === 'BODY') { | |
return jsx('fragment', {}, children) | |
} | |
if (ELEMENT_TAGS[nodeName]) { | |
const attrs = ELEMENT_TAGS[nodeName](el) | |
return jsx('element', attrs, children) | |
} | |
if (TEXT_TAGS[nodeName]) { | |
const attrs = TEXT_TAGS[nodeName](el) | |
return children.map((child) => jsx('text', attrs, child)) | |
} | |
return children | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment