Last active
April 24, 2026 03:00
-
-
Save rndme/0b21f08823a0ff0e780a4542dd5be8bf to your computer and use it in GitHub Desktop.
given a string of copied HTML code bloated with inline styles, outputs clean HTML and HIGHLY optimized CSS
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // [ccby@dandavis] pure html+css from inline-style-ladden selected+copy'd HTML code | |
| // returns clean html with embedded scoped CSS <style> tag | |
| function deriveCSS(swollenHTML) { | |
| // main logic: | |
| var rez = extractAndOptimizeStyles(swollenHTML); | |
| var rules = parseCSS(rez.css); | |
| // re-embed vars, but don't remove anything | |
| reEmbedVars(rules); | |
| // de-redundant classes | |
| stripRedundantClasses(rules); | |
| // another optimization: combine selectors rule coverage overlap, defines th,td { box-sizing: inherit; font-size:18px; ...} | |
| makeCombinedSelectorsIfNeeded(rules); | |
| // remove single-use vars | |
| removeSingleVars(rules); | |
| // one last empty rule pass after further considation: | |
| deleteEmptyRules(rules); | |
| // generate output CSS string: | |
| rez.css = rules2css(rules); | |
| // make rule object, html, and css available: | |
| deriveCSS.lastResult = {rules, ...rez}; | |
| var lastCloseTag = rez.html.lastIndexOf("<\/"), | |
| ender = rez.html.slice(lastCloseTag), | |
| rootTag = ender.slice(2).split(">")[0]; | |
| return deriveCSS.lastResult.output = rez.html.slice(0, lastCloseTag) + | |
| `\n<style>@scope{\n${rez.css.replace(rootTag, "&")}}</style>\n` + | |
| ender; | |
| ////////////////////////////////// | |
| // function bank: | |
| function extractAndOptimizeStyles(htmlString) { // gemini 3.1 rewrote this function a lot, but the rest is pure dandavis. | |
| const parser = new DOMParser(); | |
| const doc = parser.parseFromString(`<body>${htmlString}</body>`, 'text/html'); | |
| const root = doc.body; | |
| const stylesMap = {}; | |
| const styledElements = Array.from(root.querySelectorAll('[style]')); | |
| // 1. Collect and Group Styles | |
| styledElements.forEach(el => { | |
| const cssText = el.style.cssText; | |
| if (!cssText) return; | |
| let baseSelector = el.tagName.toLowerCase(); | |
| if (el.className) { | |
| baseSelector = '.' + Array.from(el.classList).join('.'); | |
| } | |
| if (!stylesMap[baseSelector]) stylesMap[baseSelector] = new Map(); | |
| if (!stylesMap[baseSelector].has(cssText)) stylesMap[baseSelector].set(cssText, []); | |
| stylesMap[baseSelector].get(cssText).push(el); | |
| }); | |
| // Helper to parse CSS text into an object | |
| const parseCSS = (str) => { | |
| const props = {}; | |
| str.split(';').forEach(decl => { | |
| const parts = decl.split(':'); | |
| if (parts.length >= 2) { | |
| const key = parts.shift().trim(); | |
| props[key] = parts.join(':').trim(); | |
| } | |
| }); | |
| return props; | |
| }; | |
| let rawRules = []; | |
| // 2. Generate Base Rules | |
| for (const [baseSelector, variationsMap] of Object.entries(stylesMap)) { | |
| const variations = Array.from(variationsMap.entries()) | |
| .sort((a, b) => b[1].length - a[1].length); | |
| variations.forEach(([cssText, els], index) => { | |
| let finalSelector = baseSelector; | |
| if (index > 0) { | |
| finalSelector = getStructuralSelector(els, baseSelector, root); | |
| } | |
| rawRules.push({ | |
| selector: finalSelector, | |
| props: parseCSS(cssText) | |
| }); | |
| }); | |
| } | |
| // 3. OPTIMIZATION A: Rule Deduplication (Remove overlapping subset rules) | |
| const isMoreSpecificMatch = (base, specific) => { | |
| if (base === specific) return false; | |
| // Check for structural extensions (e.g., ".red" inside ".red:first-child" or "tr td" inside "tr:last-child td") | |
| const escapedBase = base.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); | |
| const tokenRegex = new RegExp(`(?:^|[\\s>])${escapedBase}(?:[:\\s>\\.]|$)`); | |
| if (tokenRegex.test(specific)) return true; | |
| // Check for multi-class combinations (e.g., ".red" inside ".big.red") | |
| const getClasses = (sel) => sel.match(/\.[a-zA-Z0-9_-]+/g) || []; | |
| const baseClasses = getClasses(base); | |
| const specificClasses = getClasses(specific); | |
| if (baseClasses.length > 0 && baseClasses.length < specificClasses.length) { | |
| return baseClasses.every(c => specificClasses.includes(c)); | |
| } | |
| return false; | |
| }; | |
| for (let i = 0; i < rawRules.length; i++) { | |
| for (let j = 0; j < rawRules.length; j++) { | |
| if (i === j) continue; | |
| const ruleA = rawRules[i]; // Potential parent | |
| const ruleB = rawRules[j]; // Potential child | |
| if (isMoreSpecificMatch(ruleA.selector, ruleB.selector)) { | |
| // If B is more specific, remove any properties that identically match A | |
| for (const [key, value] of Object.entries(ruleA.props)) { | |
| if (ruleB.props[key] === value) { | |
| delete ruleB.props[key]; | |
| } | |
| } | |
| } | |
| } | |
| } | |
| // 4. OPTIMIZATION B: Variable Integration | |
| for (const rule of rawRules) { | |
| const vars = []; | |
| // Collect all CSS variables in this rule | |
| for (const [key, val] of Object.entries(rule.props)) { | |
| if (key.startsWith('--')) vars.push({ | |
| name: key, | |
| val: val | |
| }); | |
| } | |
| // Sort by length descending to match longest values first (prevents partial word matching issues) | |
| vars.sort((a, b) => b.val.length - a.val.length); | |
| // Substitute variable values in standard properties | |
| for (const key of Object.keys(rule.props)) { | |
| if (key.startsWith('--')) continue; | |
| let currentVal = rule.props[key]; | |
| for (const v of vars) { | |
| if (currentVal.includes(v.val)) { | |
| // Use regex with positive lookarounds to ensure we only replace exact word/value boundaries | |
| const escapedVal = v.val.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); | |
| const boundaryRegex = new RegExp(`(?<=^|[\\s,()])${escapedVal}(?=[\\s,()]|$)`, 'g'); | |
| currentVal = currentVal.replace(boundaryRegex, `var(${v.name})`); | |
| } | |
| } | |
| rule.props[key] = currentVal; | |
| } | |
| } | |
| // 5. Construct Final CSS String | |
| let cssOutput = ''; | |
| for (const rule of rawRules) { | |
| const keys = Object.keys(rule.props); | |
| if (keys.length === 0) continue; // Skip completely redundant rules | |
| const declarations = keys.map(k => `${k}: ${rule.props[k]};`).join(' '); | |
| cssOutput += `${rule.selector} { ${declarations} }\n`; | |
| } | |
| // 6. Clean HTML | |
| styledElements.forEach(el => el.removeAttribute('style')); | |
| return { | |
| css: cssOutput.trim() + '\n', | |
| html: root.innerHTML | |
| }; | |
| // --- Helper function for structural pseudo-classes --- | |
| function getStructuralSelector(els, base, boundary) { | |
| const allFirst = els.every(el => el.previousElementSibling === null); | |
| if (allFirst) return `${base}:first-child`; | |
| const allLast = els.every(el => el.nextElementSibling === null); | |
| if (allLast) return `${base}:last-child`; | |
| const parents = [...new Set(els.map(el => el.parentElement))].filter(Boolean); | |
| if (parents.length > 0 && parents[0] !== boundary) { | |
| const parentTag = parents[0].tagName.toLowerCase(); | |
| const allParentsFirst = parents.every(p => p.previousElementSibling === null); | |
| if (allParentsFirst) return `${parentTag}:first-child ${base}`; | |
| const allParentsLast = parents.every(p => p.nextElementSibling === null); | |
| if (allParentsLast) return `${parentTag}:last-child ${base}`; | |
| if (parents.length === 1 && parents[0].parentElement) { | |
| const idx = Array.from(parents[0].parentElement.children).indexOf(parents[0]) + 1; | |
| return `${parentTag}:nth-child(${idx}) ${base}`; | |
| } | |
| } | |
| return els.map(el => { | |
| if (!el.parentElement) return base; | |
| const idx = Array.from(el.parentElement.children).indexOf(el) + 1; | |
| return `${base}:nth-child(${idx})`; | |
| }).join(', '); | |
| } | |
| } // end extractAndOptimizeStyles() | |
| function parseCSS(css) { | |
| var out = {}; | |
| css = css.replace(/\/\*[\w\W]*?\*\//g, "").split("}").filter(String); | |
| css.forEach(x => { | |
| var p = x.split("{"), | |
| o = out[p[0].trim()] = {}; | |
| (p[1] || "").split(/\s*;\s*/).forEach(y => { | |
| var r = y.split(":"); | |
| if (r[0]) o[r[0].trim()] = r.slice(1).join(":").trim(); | |
| }); | |
| }); | |
| return out; | |
| } // end parseCSS() | |
| function rules2css(rules) { | |
| return Object.entries(rules).map(([k, v]) => { | |
| return k + " {\n\t" + Object.entries(v).map(([prop, val]) => `${prop}: ${val}`).join(";\n\t") + ";\n}"; | |
| }).join("\n").trim(); | |
| } | |
| function deleteEmptyRules(rules) { | |
| Object.entries(rules).forEach(([sel, decl], i) => { | |
| if (Object.keys(decl).length < 1) delete rules[sel]; | |
| }); | |
| } | |
| function makeCombinedSelectorsIfNeeded(objRules) { | |
| var usage = {}; | |
| var src = JSON.stringify(objRules, null, "\t").split(/\s*,?\n\s*/).filter(x => /\:/.test(x)).sort(); | |
| src.forEach(line => { | |
| usage[line] = usage[line] || 0; | |
| usage[line]++; | |
| }); | |
| // keep only pairs that occour more than twice: | |
| usage = Object.fromEntries(Object.entries(usage).filter(([k, v], i) => { | |
| if (v < 2) return false; | |
| return true; | |
| })); | |
| var r = Object.entries(objRules); | |
| // gather selectors needed/used for those common pairs: | |
| var sels = {}; | |
| var pools = {}; | |
| var vals = {}; | |
| Object.keys(usage).forEach(line => { | |
| var [prop, val] = Object.entries(eval(`0||{${line}}`))[0]; | |
| vals[prop] = val; | |
| sels[prop] = []; | |
| r.forEach(([sel, rules]) => { | |
| if (rules[prop] == val) sels[prop].push(sel); | |
| }); | |
| pools[sels[prop].join(", ")] = []; | |
| }); | |
| // define pairs in newly created pools | |
| Object.keys(sels).forEach(x => { | |
| var poolKey = sels[x].join(", "); | |
| pools[poolKey].push([x, vals[x]]); | |
| }); | |
| // iterate all rules, if rule value is same as vals, and the selector is in sels, delete | |
| r.map(([sel, decl], i) => { | |
| Object.keys(decl).forEach((prop, ind) => { | |
| var value = decl[prop]; | |
| if (vals[prop] == value && sels[prop]?.includes(sel)) { | |
| delete decl[prop]; | |
| } | |
| }); //end prop map | |
| }) ///end sel map | |
| deleteEmptyRules(objRules); | |
| // inject newly created rules containing common dupes: | |
| Object.keys(pools).forEach(sel => { | |
| var o = objRules[sel] = {}; | |
| pools[sel].forEach(decl => { | |
| o[decl[0]] = decl[1]; | |
| }); | |
| }) //end dup iteration | |
| } // end makeCombinedSelectorsIfNeeded() | |
| function reEmbedVars(rules) { | |
| Object.entries(rules).map(([k, v]) => { | |
| var nonVarRules = Object.keys(v); //.filter(x=>!/^\-\-/.test(x)); | |
| Object.keys(v).filter(x => /^\-\-/.test(x)).forEach(prop => { | |
| var pval = v[prop].slice(0); | |
| nonVarRules.forEach(nvk => { | |
| var nvv = v[nvk]; | |
| if (nvk == prop) return; | |
| if (nvv.includes(pval)) { | |
| //console.warn("replaceing", [pval, `var(${prop})`]); | |
| v[nvk] = v[nvk].split(pval).join(`var(${prop})`); | |
| } | |
| }); | |
| }); | |
| }); | |
| } // end reEmbedVars() | |
| function stripRedundantClasses(rules) { | |
| Object.entries(rules).filter(x => String(x[0]).split(".").length > 2).map(([k, v]) => { | |
| var p = k.split(/\s*\.\s*x?/).filter(String); | |
| p.forEach(cls => { | |
| cls = "." + cls; | |
| var mom = rules[cls]; | |
| if (!mom) return; | |
| // we have base class. delete any overlapping rules from current ob if contained and same in base | |
| Object.keys(v).forEach(myprop => { | |
| if (mom[myprop] == v[myprop]) delete v[myprop]; | |
| }); //props | |
| }); //rules | |
| }); //sel | |
| } //end stripRedundantClasses() | |
| function removeSingleVars(rules) { | |
| // string sum to count occorances | |
| var ruleString = JSON.stringify(rules, null, "\t").split(/\s*,?\n\s*/).filter(x => /\:/.test(x)).filter(x => /\-\-/.test(x)).sort().join("\n"); | |
| var lastLen = ruleString.length; | |
| for (let i = 0; i < 5; i++) { | |
| Object.entries(rules).map(([sel, rls]) => { | |
| Object.keys(rls).filter(x => /^\-\-/.test(x)).forEach(prop => { | |
| var cnt = ruleString.split(prop).length; | |
| if (cnt == 2) delete rls[prop]; | |
| }); // decl | |
| }); //sel | |
| ruleString = JSON.stringify(rules, null, "\t").split(/\s*,?\n\s*/).filter(x => /\:/.test(x)).filter(x => /\-\-/.test(x)).sort().join("\n"); | |
| if (lastLen == ruleString.length) break; | |
| lastLen = ruleString.length; | |
| } //next i repeat var scrub | |
| Object.entries(rules).map(([sel, rls]) => { | |
| Object.keys(rls).filter(x => /^\-\-/.test(x)).forEach(prop => { | |
| var cnt = ruleString.split(prop + ")").length; | |
| if (cnt < 2) delete rls[prop]; | |
| }); // decl | |
| }); //sel | |
| } // end removeSingleVars() | |
| } //end deriveCSS() | |
| //example usage: myDiv.innerHTML = deriveCSS( strCrappyHTML ); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment