Skip to content

Instantly share code, notes, and snippets.

@hi-ogawa
Last active December 14, 2022 06:31
Show Gist options
  • Save hi-ogawa/fb938d0ba3000b5377f5203fdf9cb697 to your computer and use it in GitHub Desktop.
Save hi-ogawa/fb938d0ba3000b5377f5203fdf9cb697 to your computer and use it in GitHub Desktop.
playwright for scraping
import { chromium, Page } from "playwright-chromium";
import process from "node:process";
// this specific example is made to extract the list of spot trade coins on ByBit
// usage:
// node -r esbuild-register playwirhgt-for-scraping.ts --headed
// note:
// "page.pause" with "--headed" allows the same debugging experience as playwright testing.
const TRADE_TABS = ["USDT", "BTC", "USDC", "DAI", "EUR"];
const TRADE_URL_RE = new RegExp(
String.raw`"(https://www.bybit.com/en-US/trade/spot/\w+/\w+)"`,
"g"
);
async function run(page: Page) {
await page.goto("https://www.bybit.com/en-US");
// reveal "Trade" popover in header
await page.locator("#HEADER-NAV >> text=Trade").hover();
const result: string[] = [];
for (const tab of TRADE_TABS) {
// click tab inside the popover
await page.locator(`#popover-root >> .tab-item >> text=${tab}`).click();
// extract all the links
const html = await page.locator("#popover-root").evaluate(node => node.innerHTML);
result.push(...extractTradeUrls(html));
}
// dump to file
console.log(JSON.stringify(result, null, 2));
}
function extractTradeUrls(html: string): string[] {
return Array.from(html.matchAll(TRADE_URL_RE)).map(match => match[1]);
}
//
// main
//
async function main() {
const browser = await chromium.launch({
headless: !process.argv.includes("--headed")
});
const page = await browser.newPage();
try {
await run(page);
} finally {
await browser.close();
}
}
if (require.main === module) {
main();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment