Last active
December 27, 2021 14:59
-
-
Save halfnibble/a03d026020c5ca530b7a11d2589735f8 to your computer and use it in GitHub Desktop.
GraphCMS HTML to RichTextAST
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const { jsx } = require('slate-hyperscript') | |
const { JSDOM } = require('jsdom') | |
const DOMParser = new JSDOM().window.DOMParser | |
/** | |
* Parses and returns a Rich Text AST object for use in GraphCMS. | |
* | |
* @param {HTML} html string, e.g. `<p>hi there</p>` | |
*/ | |
const htmlToAST = html => { | |
const parsed = new DOMParser().parseFromString(html, 'text/html') | |
const AST = deserialize(parsed.body) | |
return AST | |
} | |
const ELEMENT_TAGS = { | |
A: el => ({ type: 'link', url: el.getAttribute('href') }), | |
BLOCKQUOTE: () => ({ type: 'quote' }), | |
H1: () => ({ type: 'heading-one' }), | |
H2: () => ({ type: 'heading-two' }), | |
H3: () => ({ type: 'heading-three' }), | |
H4: () => ({ type: 'heading-four' }), | |
H5: () => ({ type: 'heading-five' }), | |
H6: () => ({ type: 'heading-six' }), | |
IMG: el => ({ type: 'image', url: el.getAttribute('src') }), | |
LI: () => ({ type: 'list-item' }), | |
OL: () => ({ type: 'numbered-list' }), | |
P: () => ({ type: 'paragraph' }), | |
PRE: () => ({ type: 'code' }), | |
UL: () => ({ type: 'bulleted-list' }), | |
} | |
// COMPAT: `B` is omitted here because Google Docs uses `<b>` in weird ways. | |
const TEXT_TAGS = { | |
CODE: () => ({ code: true }), | |
DEL: () => ({ strikethrough: true }), | |
EM: () => ({ italic: true }), | |
I: () => ({ italic: true }), | |
S: () => ({ strikethrough: true }), | |
STRONG: () => ({ bold: true }), | |
U: () => ({ underline: true }), | |
} | |
const deserialize = el => { | |
if (el.nodeType === 3) { | |
return el.textContent | |
} else if (el.nodeType !== 1) { | |
return null | |
} else if (el.nodeName === 'BR') { | |
return '\n' | |
} | |
const { nodeName } = el | |
let parent = el | |
if ( | |
nodeName === 'PRE' && | |
el.childNodes[0] && | |
el.childNodes[0].nodeName === 'CODE' | |
) { | |
parent = el.childNodes[0] | |
} | |
const children = Array.from(parent.childNodes) | |
.map(deserialize) | |
.flat() | |
if (el.nodeName === 'BODY') { | |
return jsx('fragment', {}, children) | |
} | |
if (ELEMENT_TAGS[nodeName]) { | |
const attrs = ELEMENT_TAGS[nodeName](el) | |
return jsx('element', attrs, children) | |
} | |
if (TEXT_TAGS[nodeName]) { | |
const attrs = TEXT_TAGS[nodeName](el) | |
return children.map(child => jsx('text', attrs, child)) | |
} | |
return children | |
} | |
module.exports = htmlToAST |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thank you, I needed this, I made a TypeScript version here: https://gist.github.com/bouiboui/7e4a4561e31ab6a5700225ff0122858f