Created
June 21, 2022 00:20
-
-
Save wolfiex/8b86721e6889449467afd61d8f1e08f9 to your computer and use it in GitHub Desktop.
Extract CSS from url
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# headless browser | |
from selenium import webdriver | |
from webdriver_manager.chrome import ChromeDriverManager | |
from selenium.webdriver.chrome.options import Options | |
import re | |
url = 'myurl.com' | |
querySelect = 'section' | |
chrome_options = Options() | |
chrome_options.add_argument("--headless") | |
driver = webdriver.Chrome(ChromeDriverManager().install(), options=chrome_options) | |
js = ''' | |
// this is a js function | |
const isSameDomain = (styleSheet) => { | |
if (!styleSheet.href) { | |
return true; | |
} | |
return styleSheet.href.indexOf(window.location.origin) === 0; | |
}; | |
const isStyleRule = (rule) => rule.type === 1; | |
const getCSSCustomPropIndex = () => | |
[...document.styleSheets] | |
.filter(isSameDomain) | |
.reduce( | |
(finalArr, sheet) => | |
finalArr.concat([...sheet.cssRules].filter(isStyleRule)), | |
[] | |
); | |
var css = {} | |
var stylesheet = getCSSCustomPropIndex(); | |
stylesheet.forEach(d=> | |
css[d.selectorText] = css[d.selectorText]||'' + d.style.cssText) | |
return css | |
''' | |
driver.get(url) | |
stylesheets = driver.execute_script(js) | |
ekind = re.compile(r'\<(\w+)') | |
relevant = [querySelect] | |
# starting with our selected elements | |
for match in driver.find_elements_by_css_selector(querySelect): | |
# lets look at each child | |
for child in match.find_elements_by_css_selector('*'): | |
relevant.append('.'+child.get_attribute('class')) | |
relevant.append('#'+child.get_attribute('id')) | |
relevant.append(ekind.search(child.get_attribute('outerHTML'))[1]) | |
relevant = list(set(relevant)-set(['','.','#'])) | |
# print(relevant) | |
cleancss = re.compile('\>|\<|:[^\s]+|\[.+\]') | |
cssmatch = re.compile(re.sub('[\. #]','','|'.join(relevant))) | |
def matched (x): | |
x = cleancss.sub('',x) | |
passed = False | |
# for each grouped selection | |
for groups in re.split(' *, *',x): | |
singlepass = True | |
# for each chain | |
for chain in re.split('[\s \.#<>]+',groups): | |
singlepass *= bool(cssmatch.fullmatch(chain)) | |
# print(chain,bool(cssmatch.fullmatch(chain))) | |
passed += bool(singlepass) | |
# print(passed,re.split('[\s \.#<>]+',groups),'---',x) | |
return passed | |
csskeys = list(filter(matched,stylesheets.keys())) | |
csskeys.sort() | |
# print(csskeys,cssmatch) | |
with open(querySelect+'.css','w') as f: | |
for i in csskeys: | |
f.write('%s{%s}\n\n'%(i,stylesheets[i])) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment