Skip to content

Instantly share code, notes, and snippets.

@rndme
Last active April 24, 2026 03:00
Show Gist options
  • Select an option

  • Save rndme/0b21f08823a0ff0e780a4542dd5be8bf to your computer and use it in GitHub Desktop.

Select an option

Save rndme/0b21f08823a0ff0e780a4542dd5be8bf to your computer and use it in GitHub Desktop.
given a string of copied HTML code bloated with inline styles, outputs clean HTML and HIGHLY optimized CSS
// [ccby@dandavis] pure html+css from inline-style-ladden selected+copy'd HTML code
// returns clean html with embedded scoped CSS <style> tag
function deriveCSS(swollenHTML) {
// main logic:
var rez = extractAndOptimizeStyles(swollenHTML);
var rules = parseCSS(rez.css);
// re-embed vars, but don't remove anything
reEmbedVars(rules);
// de-redundant classes
stripRedundantClasses(rules);
// another optimization: combine selectors rule coverage overlap, defines th,td { box-sizing: inherit; font-size:18px; ...}
makeCombinedSelectorsIfNeeded(rules);
// remove single-use vars
removeSingleVars(rules);
// one last empty rule pass after further considation:
deleteEmptyRules(rules);
// generate output CSS string:
rez.css = rules2css(rules);
// make rule object, html, and css available:
deriveCSS.lastResult = {rules, ...rez};
var lastCloseTag = rez.html.lastIndexOf("<\/"),
ender = rez.html.slice(lastCloseTag),
rootTag = ender.slice(2).split(">")[0];
return deriveCSS.lastResult.output = rez.html.slice(0, lastCloseTag) +
`\n<style>@scope{\n${rez.css.replace(rootTag, "&")}}</style>\n` +
ender;
//////////////////////////////////
// function bank:
function extractAndOptimizeStyles(htmlString) { // gemini 3.1 rewrote this function a lot, but the rest is pure dandavis.
const parser = new DOMParser();
const doc = parser.parseFromString(`<body>${htmlString}</body>`, 'text/html');
const root = doc.body;
const stylesMap = {};
const styledElements = Array.from(root.querySelectorAll('[style]'));
// 1. Collect and Group Styles
styledElements.forEach(el => {
const cssText = el.style.cssText;
if (!cssText) return;
let baseSelector = el.tagName.toLowerCase();
if (el.className) {
baseSelector = '.' + Array.from(el.classList).join('.');
}
if (!stylesMap[baseSelector]) stylesMap[baseSelector] = new Map();
if (!stylesMap[baseSelector].has(cssText)) stylesMap[baseSelector].set(cssText, []);
stylesMap[baseSelector].get(cssText).push(el);
});
// Helper to parse CSS text into an object
const parseCSS = (str) => {
const props = {};
str.split(';').forEach(decl => {
const parts = decl.split(':');
if (parts.length >= 2) {
const key = parts.shift().trim();
props[key] = parts.join(':').trim();
}
});
return props;
};
let rawRules = [];
// 2. Generate Base Rules
for (const [baseSelector, variationsMap] of Object.entries(stylesMap)) {
const variations = Array.from(variationsMap.entries())
.sort((a, b) => b[1].length - a[1].length);
variations.forEach(([cssText, els], index) => {
let finalSelector = baseSelector;
if (index > 0) {
finalSelector = getStructuralSelector(els, baseSelector, root);
}
rawRules.push({
selector: finalSelector,
props: parseCSS(cssText)
});
});
}
// 3. OPTIMIZATION A: Rule Deduplication (Remove overlapping subset rules)
const isMoreSpecificMatch = (base, specific) => {
if (base === specific) return false;
// Check for structural extensions (e.g., ".red" inside ".red:first-child" or "tr td" inside "tr:last-child td")
const escapedBase = base.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
const tokenRegex = new RegExp(`(?:^|[\\s>])${escapedBase}(?:[:\\s>\\.]|$)`);
if (tokenRegex.test(specific)) return true;
// Check for multi-class combinations (e.g., ".red" inside ".big.red")
const getClasses = (sel) => sel.match(/\.[a-zA-Z0-9_-]+/g) || [];
const baseClasses = getClasses(base);
const specificClasses = getClasses(specific);
if (baseClasses.length > 0 && baseClasses.length < specificClasses.length) {
return baseClasses.every(c => specificClasses.includes(c));
}
return false;
};
for (let i = 0; i < rawRules.length; i++) {
for (let j = 0; j < rawRules.length; j++) {
if (i === j) continue;
const ruleA = rawRules[i]; // Potential parent
const ruleB = rawRules[j]; // Potential child
if (isMoreSpecificMatch(ruleA.selector, ruleB.selector)) {
// If B is more specific, remove any properties that identically match A
for (const [key, value] of Object.entries(ruleA.props)) {
if (ruleB.props[key] === value) {
delete ruleB.props[key];
}
}
}
}
}
// 4. OPTIMIZATION B: Variable Integration
for (const rule of rawRules) {
const vars = [];
// Collect all CSS variables in this rule
for (const [key, val] of Object.entries(rule.props)) {
if (key.startsWith('--')) vars.push({
name: key,
val: val
});
}
// Sort by length descending to match longest values first (prevents partial word matching issues)
vars.sort((a, b) => b.val.length - a.val.length);
// Substitute variable values in standard properties
for (const key of Object.keys(rule.props)) {
if (key.startsWith('--')) continue;
let currentVal = rule.props[key];
for (const v of vars) {
if (currentVal.includes(v.val)) {
// Use regex with positive lookarounds to ensure we only replace exact word/value boundaries
const escapedVal = v.val.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
const boundaryRegex = new RegExp(`(?<=^|[\\s,()])${escapedVal}(?=[\\s,()]|$)`, 'g');
currentVal = currentVal.replace(boundaryRegex, `var(${v.name})`);
}
}
rule.props[key] = currentVal;
}
}
// 5. Construct Final CSS String
let cssOutput = '';
for (const rule of rawRules) {
const keys = Object.keys(rule.props);
if (keys.length === 0) continue; // Skip completely redundant rules
const declarations = keys.map(k => `${k}: ${rule.props[k]};`).join(' ');
cssOutput += `${rule.selector} { ${declarations} }\n`;
}
// 6. Clean HTML
styledElements.forEach(el => el.removeAttribute('style'));
return {
css: cssOutput.trim() + '\n',
html: root.innerHTML
};
// --- Helper function for structural pseudo-classes ---
function getStructuralSelector(els, base, boundary) {
const allFirst = els.every(el => el.previousElementSibling === null);
if (allFirst) return `${base}:first-child`;
const allLast = els.every(el => el.nextElementSibling === null);
if (allLast) return `${base}:last-child`;
const parents = [...new Set(els.map(el => el.parentElement))].filter(Boolean);
if (parents.length > 0 && parents[0] !== boundary) {
const parentTag = parents[0].tagName.toLowerCase();
const allParentsFirst = parents.every(p => p.previousElementSibling === null);
if (allParentsFirst) return `${parentTag}:first-child ${base}`;
const allParentsLast = parents.every(p => p.nextElementSibling === null);
if (allParentsLast) return `${parentTag}:last-child ${base}`;
if (parents.length === 1 && parents[0].parentElement) {
const idx = Array.from(parents[0].parentElement.children).indexOf(parents[0]) + 1;
return `${parentTag}:nth-child(${idx}) ${base}`;
}
}
return els.map(el => {
if (!el.parentElement) return base;
const idx = Array.from(el.parentElement.children).indexOf(el) + 1;
return `${base}:nth-child(${idx})`;
}).join(', ');
}
} // end extractAndOptimizeStyles()
function parseCSS(css) {
var out = {};
css = css.replace(/\/\*[\w\W]*?\*\//g, "").split("}").filter(String);
css.forEach(x => {
var p = x.split("{"),
o = out[p[0].trim()] = {};
(p[1] || "").split(/\s*;\s*/).forEach(y => {
var r = y.split(":");
if (r[0]) o[r[0].trim()] = r.slice(1).join(":").trim();
});
});
return out;
} // end parseCSS()
function rules2css(rules) {
return Object.entries(rules).map(([k, v]) => {
return k + " {\n\t" + Object.entries(v).map(([prop, val]) => `${prop}: ${val}`).join(";\n\t") + ";\n}";
}).join("\n").trim();
}
function deleteEmptyRules(rules) {
Object.entries(rules).forEach(([sel, decl], i) => {
if (Object.keys(decl).length < 1) delete rules[sel];
});
}
function makeCombinedSelectorsIfNeeded(objRules) {
var usage = {};
var src = JSON.stringify(objRules, null, "\t").split(/\s*,?\n\s*/).filter(x => /\:/.test(x)).sort();
src.forEach(line => {
usage[line] = usage[line] || 0;
usage[line]++;
});
// keep only pairs that occour more than twice:
usage = Object.fromEntries(Object.entries(usage).filter(([k, v], i) => {
if (v < 2) return false;
return true;
}));
var r = Object.entries(objRules);
// gather selectors needed/used for those common pairs:
var sels = {};
var pools = {};
var vals = {};
Object.keys(usage).forEach(line => {
var [prop, val] = Object.entries(eval(`0||{${line}}`))[0];
vals[prop] = val;
sels[prop] = [];
r.forEach(([sel, rules]) => {
if (rules[prop] == val) sels[prop].push(sel);
});
pools[sels[prop].join(", ")] = [];
});
// define pairs in newly created pools
Object.keys(sels).forEach(x => {
var poolKey = sels[x].join(", ");
pools[poolKey].push([x, vals[x]]);
});
// iterate all rules, if rule value is same as vals, and the selector is in sels, delete
r.map(([sel, decl], i) => {
Object.keys(decl).forEach((prop, ind) => {
var value = decl[prop];
if (vals[prop] == value && sels[prop]?.includes(sel)) {
delete decl[prop];
}
}); //end prop map
}) ///end sel map
deleteEmptyRules(objRules);
// inject newly created rules containing common dupes:
Object.keys(pools).forEach(sel => {
var o = objRules[sel] = {};
pools[sel].forEach(decl => {
o[decl[0]] = decl[1];
});
}) //end dup iteration
} // end makeCombinedSelectorsIfNeeded()
function reEmbedVars(rules) {
Object.entries(rules).map(([k, v]) => {
var nonVarRules = Object.keys(v); //.filter(x=>!/^\-\-/.test(x));
Object.keys(v).filter(x => /^\-\-/.test(x)).forEach(prop => {
var pval = v[prop].slice(0);
nonVarRules.forEach(nvk => {
var nvv = v[nvk];
if (nvk == prop) return;
if (nvv.includes(pval)) {
//console.warn("replaceing", [pval, `var(${prop})`]);
v[nvk] = v[nvk].split(pval).join(`var(${prop})`);
}
});
});
});
} // end reEmbedVars()
function stripRedundantClasses(rules) {
Object.entries(rules).filter(x => String(x[0]).split(".").length > 2).map(([k, v]) => {
var p = k.split(/\s*\.\s*x?/).filter(String);
p.forEach(cls => {
cls = "." + cls;
var mom = rules[cls];
if (!mom) return;
// we have base class. delete any overlapping rules from current ob if contained and same in base
Object.keys(v).forEach(myprop => {
if (mom[myprop] == v[myprop]) delete v[myprop];
}); //props
}); //rules
}); //sel
} //end stripRedundantClasses()
function removeSingleVars(rules) {
// string sum to count occorances
var ruleString = JSON.stringify(rules, null, "\t").split(/\s*,?\n\s*/).filter(x => /\:/.test(x)).filter(x => /\-\-/.test(x)).sort().join("\n");
var lastLen = ruleString.length;
for (let i = 0; i < 5; i++) {
Object.entries(rules).map(([sel, rls]) => {
Object.keys(rls).filter(x => /^\-\-/.test(x)).forEach(prop => {
var cnt = ruleString.split(prop).length;
if (cnt == 2) delete rls[prop];
}); // decl
}); //sel
ruleString = JSON.stringify(rules, null, "\t").split(/\s*,?\n\s*/).filter(x => /\:/.test(x)).filter(x => /\-\-/.test(x)).sort().join("\n");
if (lastLen == ruleString.length) break;
lastLen = ruleString.length;
} //next i repeat var scrub
Object.entries(rules).map(([sel, rls]) => {
Object.keys(rls).filter(x => /^\-\-/.test(x)).forEach(prop => {
var cnt = ruleString.split(prop + ")").length;
if (cnt < 2) delete rls[prop];
}); // decl
}); //sel
} // end removeSingleVars()
} //end deriveCSS()
//example usage: myDiv.innerHTML = deriveCSS( strCrappyHTML );
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment