Last active
February 22, 2022 00:51
-
-
Save brandonb927/2341c99c9a6f5a52f4f8cd6eb64d027a to your computer and use it in GitHub Desktop.
A tiny self-contained node script to scrape the VS Code Marketplace website to get all the information about the packages you have installed in VS Code.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
// $ npm install -d fs-extra ora open puppeteer | |
const { spawn } = require('child_process') | |
const readline = require('readline') | |
const path = require('path') | |
const fse = require('fs-extra') | |
const open = require('open') | |
const puppeteer = require('puppeteer') | |
const ora = require('ora') | |
let extensionList = [] | |
let extensionsDetailedList = [] | |
const OPEN_IN_BROWSER = process.env.OPEN_IN_BROWSER || false | |
const htmlFilePath = path.resolve(__dirname, 'dist/index.html') | |
const extensionsListPath = path.resolve(__dirname, 'dist/extensions.json') | |
const writeExtensionsFile = extensions => { | |
fse.outputFile(extensionsListPath, JSON.stringify(extensions)) | |
} | |
const writeHtmlFile = extensions => { | |
const content = `<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta name="viewport" content="width=device-width, initial-scale=1"> | |
<link rel="stylesheet" href="https://unpkg.com/tachyons/css/tachyons.min.css"/> | |
</head> | |
<body class="athelas mw7 ph3 center"> | |
<div> | |
<h1 class="f-headline lh-solid"> | |
${extensions.length} extensions installed! | |
</h1> | |
${extensions.sort((a, b) => { | |
const textA = a.title.toUpperCase() | |
const textB = b.title.toUpperCase() | |
return (textA < textB) ? -1 : (textA > textB) ? 1 : 0 | |
}).map(ext => { | |
return `<div class="pv3 bb b--black-10 flex justify-between"> | |
<div class="flex items-center mw4 mr4 w-100"> | |
<img src="${ext.img || 'https://seeklogo.com/images/A/atom-logo-19BD90FF87-seeklogo.com.png'}" class="db" /> | |
</div> | |
<div class="w-100"> | |
<h2 class="mv0 f2 lh-copy"> | |
<a href="${ext.url}" class="link underline black hover-orange">${ext.title}</a> | |
</h2> | |
<h3 class="mv0 lh-copy">${ext.name}</h3> | |
<p class="lh-copy">${ext.desc}</p> | |
</div> | |
</div>` | |
}).join('')} | |
<br /> | |
<br /> | |
</div> | |
<div class="pv4"> | |
<p class="lh-copy tc">Generated at ${new Date().toISOString()}</a> | |
<p class="lh-copy tc">Made with <3 by <a href="https://brandonb.ca">Brandon Brown</a> | |
</div> | |
</body> | |
</html>` | |
fse.outputFile(htmlFilePath, content).then(() => { | |
if (OPEN_IN_BROWSER) open(htmlFilePath) | |
}) | |
} | |
;(async () => { | |
const fileExists = await fse.pathExists(extensionsListPath) | |
if (fileExists) { | |
const spinner = ora('Reading cached extensions').start() | |
const extensionsJson = await fse.readJson(extensionsListPath) | |
writeHtmlFile(extensionsJson) | |
setTimeout(() => { | |
spinner.succeed('All done! Please refresh your browser to a view rebuilt page.') | |
}, 1000) | |
} else { | |
const installedExtensions = spawn('code', [ | |
'--list-extensions' | |
]) | |
const rl = readline.createInterface({ | |
input: installedExtensions.stdout, | |
terminal: false | |
}) | |
rl.on('line', (name) => { | |
extensionList.push({ | |
name, | |
url: `https://marketplace.visualstudio.com/items?itemName=${name}` | |
}) | |
}) | |
.on('close', async () => { | |
const spinner = ora('Scraping extension info from the marketplace').start() | |
const browser = await puppeteer.launch() // For debugging {headless: false} | |
const scrape = async extension => { | |
const page = await browser.newPage() | |
await page.goto(extension.url, { | |
waitUntil: 'networkidle0', | |
timeout: 0 | |
}) | |
const results = await page.evaluate(({ url, name }) => { | |
const title = document.querySelector('.item-header .ux-item-name').innerText | |
const desc = document.querySelector('.item-header .ux-item-shortdesc').innerText | |
const img = document.querySelector('.item-img .image-display').getAttribute('src') | |
console.log(img) | |
return { | |
title, | |
desc, | |
img, | |
name, | |
url | |
} | |
}, extension) | |
return results | |
} | |
for (let i = 0; i < extensionList.length; i++) { | |
let extension = extensionList[i] | |
try { | |
let data = await scrape(extension) | |
extensionsDetailedList.push(data) | |
spinner.text = `Received extension: ${data.name}` | |
} catch (e) { | |
throw e | |
} | |
} | |
await browser.close() | |
writeExtensionsFile(extensionsDetailedList) | |
writeHtmlFile(extensionsDetailedList) | |
setTimeout(() => { | |
spinner.succeed('All done!') | |
}, 1000) | |
}) | |
} | |
})() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment