Skip to content

Instantly share code, notes, and snippets.

@captainbrosset
Created July 9, 2020 15:24
Show Gist options
  • Save captainbrosset/301ef73a4a6552e0215ec9d50f69d615 to your computer and use it in GitHub Desktop.
Save captainbrosset/301ef73a4a6552e0215ec9d50f69d615 to your computer and use it in GitHub Desktop.
css grid website scraping
column-gap: 0.375rem
column-gap: 1rem
column-gap: 2rem
column-gap: 2vw
column-gap: 35px
column-gap: 5rem
column-gap:.625rem
column-gap:1.875rem
column-gap:10px
column-gap:10px
column-gap:1em
column-gap:2.8rem
column-gap:2em
column-gap:60px
display: grid
display: grid
display: grid
display: grid
display: grid
display: grid
display: grid
display: grid
display: grid
display: grid
display: grid
display: grid
display: grid
display:grid
display:grid
display:grid
display:grid
display:grid
display:grid
display:grid
display:grid
display:grid
display:grid
display:grid
display:grid
display:grid
display:grid
display:grid
display:grid
display:grid
display:grid
display:grid
display:grid
display:grid
display:grid
display:grid
display:grid
display:grid
gap: .75em
gap: 0 1.5em
gap: 0 2em
gap: 0.375rem
gap: 1rem
gap: 1rem
gap: 20px
gap: 2vw
gap: 35px
gap: var(--space-2)
gap:.625rem
gap:1.875rem
gap:10px
gap:10px
gap:12px
gap:12px
gap:15px
gap:16px
gap:16px
gap:16px 16px
gap:1em
gap:1rem
gap:1rem
gap:2.8rem
gap:20px
gap:20px
gap:2em
gap:2rem
gap:2rem
gap:60px
gap:9px
gap:var(--gutter)
gap:var(--listSpacing) 32px
gap:var(--rowLayoutGap)
grid-area: article-info
grid-area: header
grid-area: intro
grid-area:1/1
grid-area:1/1/auto/-1
grid-area:1/2
grid-area:2/2
grid-area:a
grid-area:a
grid-area:auto/span 2
grid-area:form
grid-area:main
grid-auto-flow: dense
grid-auto-flow:column
grid-auto-flow:column
grid-auto-flow:column
grid-auto-flow:dense
grid-auto-flow:row dense
grid-auto-rows: min-content
grid-auto-rows:auto
grid-column-end:27
grid-column-end:3
grid-column-end:span 1
grid-column-end:span 2
grid-column-gap: 0.375rem
grid-column-gap: 1rem
grid-column-gap: 2rem
grid-column-gap: 2vw
grid-column-gap: 35px
grid-column-gap: 5rem
grid-column-gap:.625rem
grid-column-gap:10px
grid-column-gap:1em
grid-column-gap:2.8rem
grid-column-gap:20px
grid-column-gap:2em
grid-column-gap:60px
grid-column-start:1
grid-column-start:1
grid-column-start:13
grid-column: 1 / -1
grid-column: 1 / 2
grid-column: 2
grid-column: 2
grid-column: 2 / span 12
grid-column: 3
grid-column: intro
grid-column: main
grid-column: span 2
grid-column: span 3
grid-column:1
grid-column:1
grid-column:1 / -1
grid-column:1/-1
grid-column:1/2
grid-column:1/3
grid-column:1/9
grid-column:auto/span 6
grid-column:m / m 4
grid-column:span 2
grid-column:span 2
grid-column:wrapper
grid-row-end: span 2
grid-row-end:span 1
grid-row-gap: 0.375rem
grid-row-gap:2.5rem
grid-row-gap:30px
grid-row-gap:6px
grid-row: 1
grid-row: 1 / -1
grid-row: 1 / 3
grid-row: 1 / 5
grid-row: 5
grid-row: content
grid-row: span 2
grid-row:1
grid-row:1/2
grid-row:1/2
grid-row:1/2
grid-row:1/2
grid-row:1/span 2
grid-row:span 2
grid-template-areas:
"header"
"nav"
"main"
"footer"
grid-template-areas:
"head"
"main"
"foot"
grid-template-areas: "article-table-of-contents article-main article-head" "article-table-of-contents article-additional-info article-head"
grid-template-areas:". . . . . . a" ". . . . . . a" ". . . . . b b" ". . . c c c d" ". . e e f f g"
grid-template-areas:"a b"
grid-template-areas:"socials" "form" "siteinfo"
grid-template-areas:'document-toc-container' 'main' 'side'
grid-template-columns: 1fr
grid-template-columns: repeat(2,1fr)
grid-template-columns: 190px auto
grid-template-columns: 1fr
grid-template-columns: 1fr 1fr
grid-template-columns: 1fr auto 1fr
grid-template-columns: 1fr max-content 1fr
grid-template-columns: 1fr min-content
grid-template-columns: [edge-start] minmax(0.5rem, 1fr) [content-start] repeat(12, minmax(1rem, 5rem)) [content-end] minmax(0.5rem, 1fr) [edge-end]
grid-template-columns: auto
grid-template-columns: auto auto auto
grid-template-columns: repeat(auto-fill, minmax(9rem, 1fr))
grid-template-columns:0 [wrapper-start] repeat(24,1fr) [wrapper-end] 0
grid-template-columns:1.75fr 1fr
grid-template-columns:112px 141px 58px 38px
grid-template-columns:180px calc(100% - 540px) 320px
grid-template-columns:1fr 1fr
grid-template-columns:1fr 1fr
grid-template-columns:1fr 1fr
grid-template-columns:40px 2fr 80px
grid-template-columns:75px 1fr
grid-template-columns:[max] 1fr [l] 1fr [m] repeat(3, [m] 28vw) [m] 1fr [l] 1fr [max]
grid-template-columns:auto 1fr
grid-template-columns:calc(3vw + 28px) 1fr 6vw 1vw 5vw 2vw 12vw
grid-template-columns:calc(50% - 4px) calc(50% - 4px)
grid-template-columns:minmax(auto,720px) auto
grid-template-columns:repeat(2,minmax(0,1fr))
grid-template-columns:repeat(3, 1fr)
grid-template-columns:repeat(3, 33%)
grid-template-columns:repeat(4,1fr)
grid-template-columns:repeat(6,1fr)
grid-template-columns:repeat(auto-fill, minmax(250px, 1fr))
grid-template-rows: 10vh 1fr 20vh
grid-template-rows: [nav] 3rem [content] auto [doormat] 17rem [end]
grid-template-rows: auto
grid-template-rows: auto
grid-template-rows: auto 1fr auto
grid-template-rows: repeat(2,1fr)
grid-template-rows: repeat(4, auto)
grid-template-rows: repeat(7,min-content) 1fr repeat(2,min-content)
grid-template-rows:107px 87px 125px 185px
grid-template-rows:1fr
grid-template-rows:1fr
grid-template-rows:1fr 20em
grid-template-rows:25vh auto
grid-template-rows:auto
grid-template-rows:auto
grid-template-rows:auto 1fr auto
grid-template-rows:auto auto auto auto
grid-template-rows:auto minmax(0,1fr)
grid-template-rows:calc(3vw + 28px) 1fr 15vh 15vh 15vh
grid-template-rows:repeat(var(--state-rows), 1fr)
grid:after,.mashup .grid:before{content:" "
grid:auto-flow 28px/repeat(2,1fr)
grid:auto-flow 34px/repeat(3,1fr)
grid:auto/152px 1fr
grid:auto/54px 1fr auto
grid:auto/minmax(0,1fr)
grid:auto/repeat(2,1fr)
grid:auto/repeat(var(--columnCount),1fr)
grid:nth-child(2n){margin-right:0
row-gap: 0.375rem
row-gap:2.5rem
row-gap:20px
row-gap:30px
row-gap:6px
row-gap:var(--rowLayoutGap)
"use strict";
const puppeteer = require('puppeteer');
const fs = require('fs');
const FILE_NAME = 'data.txt';
const URLS_WITH_GRIDS = [
"http://bryanlrobinson.com/",
"http://www.duetshop.co.uk/",
"https://www.greecemedtravel.com.au/",
"http://gridbyexample.com/",
"http://jeremyfuksa.com/",
"https://julian.is/",
"http://meyerweb.com/",
"https://www.niagarafallsusa.com/",
"http://oddbird.net/",
"https://orangefla.me/",
"https://samg.info/",
"https://simpleicons.org/",
"https://summit.microsoftedge.com/",
"https://ttimsmith.com/",
"https://toddl.dev/",
"https://valentinog.com/",
"https://www.wismutlabs.com/",
"https://www.wbsl.com/studios/ ",
"https://summit.microsoftedge.com/#schedule ",
"https://www.nytimes.com/ ",
"https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Nullish_coalescing_operator",
"https://codepen.io/",
"https://www.g-ba.de/",
"https://hey.com/",
"https://www.netlify.com/",
"https://2020.sofaconf.com/",
"https://stripe.com/en-fr",
];
function isStylesheetInteresting(text) {
return text.match(/display\s*:\s*(grid|inline-grid)/);
}
async function getPageStylesheets(browser, url) {
const page = await browser.newPage();
const stylesheets = [];
page.on('response', async response => {
try {
if(response.request().resourceType() === 'stylesheet') {
const text = await response.text();
if (isStylesheetInteresting(text)) {
console.log(`>>> Found a stylesheet at ${response.url()}`);
stylesheets.push(text);
}
}
} catch (e) {}
});
try {
await page.goto(url);
} catch (e) {}
return stylesheets;
}
async function getStylesheetsFromAllPages(browser) {
let stylesheets = [];
for (const url of URLS_WITH_GRIDS) {
console.log(`>> Crawling ${url}`);
stylesheets = [...stylesheets, ...(await getPageStylesheets(browser, url))];
}
return stylesheets;
}
const GRID_PROPERTIES_RE = [
/display\s*:\s*(grid|inline-grid)/g,
/grid\s*:[^;}]*/g,
/grid-template\s*:[^;}]*/g,
/grid-template-rows\s*:[^;}]*/g,
/grid-template-columns\s*:[^;}]*/g,
/grid-template-areas\s*:[^;}]*/g,
/grid-area\s*:[^;}]*/g,
/grid-column\s*:[^;}]*/g,
/grid-row\s*:[^;}]*/g,
/grid-column-start\s*:[^;}]*/g,
/grid-column-end\s*:[^;}]*/g,
/grid-row-start\s*:[^;}]*/g,
/grid-row-end\s*:[^;}]*/g,
/grid-auto-rows\s*:[^;}]*/g,
/grid-auto-columns\s*:[^;}]*/g,
/grid-auto-flow\s*:[^;}]*/g,
/grid-row-gap\s*:[^;}]*/g,
/grid-column-gap\s*:[^;}]*/g,
/row-gap\s*:[^;}]*/g,
/column-gap\s*:[^;}]*/g,
/gap\s*:[^;}]*/g,
];
function extractGridProperties(text) {
const props = [];
for (const re of GRID_PROPERTIES_RE) {
const match = text.match(re);
if (match) {
props.push(match[0].trim());
}
}
return props;
}
(async () => {
const browser = await puppeteer.launch();
const stylesheets = await getStylesheetsFromAllPages(browser);
await browser.close();
const data = stylesheets.map(extractGridProperties).flat().sort();
fs.unlinkSync(FILE_NAME);
for (const line of data) {
fs.writeFileSync(FILE_NAME, line + "\n", {flag: 'a+'});
}
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment