Last active
December 14, 2022 06:31
-
-
Save hi-ogawa/fb938d0ba3000b5377f5203fdf9cb697 to your computer and use it in GitHub Desktop.
playwright for scraping
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { chromium, Page } from "playwright-chromium"; | |
import process from "node:process"; | |
// this specific example is made to extract the list of spot trade coins on ByBit | |
// usage: | |
// node -r esbuild-register playwirhgt-for-scraping.ts --headed | |
// note: | |
// "page.pause" with "--headed" allows the same debugging experience as playwright testing. | |
const TRADE_TABS = ["USDT", "BTC", "USDC", "DAI", "EUR"]; | |
const TRADE_URL_RE = new RegExp( | |
String.raw`"(https://www.bybit.com/en-US/trade/spot/\w+/\w+)"`, | |
"g" | |
); | |
async function run(page: Page) { | |
await page.goto("https://www.bybit.com/en-US"); | |
// reveal "Trade" popover in header | |
await page.locator("#HEADER-NAV >> text=Trade").hover(); | |
const result: string[] = []; | |
for (const tab of TRADE_TABS) { | |
// click tab inside the popover | |
await page.locator(`#popover-root >> .tab-item >> text=${tab}`).click(); | |
// extract all the links | |
const html = await page.locator("#popover-root").evaluate(node => node.innerHTML); | |
result.push(...extractTradeUrls(html)); | |
} | |
// dump to file | |
console.log(JSON.stringify(result, null, 2)); | |
} | |
function extractTradeUrls(html: string): string[] { | |
return Array.from(html.matchAll(TRADE_URL_RE)).map(match => match[1]); | |
} | |
// | |
// main | |
// | |
async function main() { | |
const browser = await chromium.launch({ | |
headless: !process.argv.includes("--headed") | |
}); | |
const page = await browser.newPage(); | |
try { | |
await run(page); | |
} finally { | |
await browser.close(); | |
} | |
} | |
if (require.main === module) { | |
main(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment