Skip to content

Instantly share code, notes, and snippets.

@wolfiex
Created June 21, 2022 00:20
Show Gist options
  • Save wolfiex/8b86721e6889449467afd61d8f1e08f9 to your computer and use it in GitHub Desktop.
Save wolfiex/8b86721e6889449467afd61d8f1e08f9 to your computer and use it in GitHub Desktop.
Extract CSS from url
# headless browser
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
import re
url = 'myurl.com'
querySelect = 'section'
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(ChromeDriverManager().install(), options=chrome_options)
js = '''
// this is a js function
const isSameDomain = (styleSheet) => {
if (!styleSheet.href) {
return true;
}
return styleSheet.href.indexOf(window.location.origin) === 0;
};
const isStyleRule = (rule) => rule.type === 1;
const getCSSCustomPropIndex = () =>
[...document.styleSheets]
.filter(isSameDomain)
.reduce(
(finalArr, sheet) =>
finalArr.concat([...sheet.cssRules].filter(isStyleRule)),
[]
);
var css = {}
var stylesheet = getCSSCustomPropIndex();
stylesheet.forEach(d=>
css[d.selectorText] = css[d.selectorText]||'' + d.style.cssText)
return css
'''
driver.get(url)
stylesheets = driver.execute_script(js)
ekind = re.compile(r'\<(\w+)')
relevant = [querySelect]
# starting with our selected elements
for match in driver.find_elements_by_css_selector(querySelect):
# lets look at each child
for child in match.find_elements_by_css_selector('*'):
relevant.append('.'+child.get_attribute('class'))
relevant.append('#'+child.get_attribute('id'))
relevant.append(ekind.search(child.get_attribute('outerHTML'))[1])
relevant = list(set(relevant)-set(['','.','#']))
# print(relevant)
cleancss = re.compile('\>|\<|:[^\s]+|\[.+\]')
cssmatch = re.compile(re.sub('[\. #]','','|'.join(relevant)))
def matched (x):
x = cleancss.sub('',x)
passed = False
# for each grouped selection
for groups in re.split(' *, *',x):
singlepass = True
# for each chain
for chain in re.split('[\s \.#<>]+',groups):
singlepass *= bool(cssmatch.fullmatch(chain))
# print(chain,bool(cssmatch.fullmatch(chain)))
passed += bool(singlepass)
# print(passed,re.split('[\s \.#<>]+',groups),'---',x)
return passed
csskeys = list(filter(matched,stylesheets.keys()))
csskeys.sort()
# print(csskeys,cssmatch)
with open(querySelect+'.css','w') as f:
for i in csskeys:
f.write('%s{%s}\n\n'%(i,stylesheets[i]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment