Created
September 26, 2018 16:09
-
-
Save pfeilbr/5c2fb9141b8292b6b352063ac10ce1f7 to your computer and use it in GitHub Desktop.
export list of US banks in csv
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(async () => { | |
class Scraper { | |
sleep(ms) { | |
return new Promise(resolve => setTimeout(resolve, ms)); | |
} | |
async fetchURLContent(url) { | |
const resp = await fetch(url) | |
return await resp.text() | |
} | |
async parseDataFromURL(url) { | |
const parser = new DOMParser() | |
const items = [] | |
const html = await this.fetchURLContent(url) | |
const doc = parser.parseFromString(html, "text/html") | |
const table = doc.querySelector('.bank_savings_rates') | |
const rows = table.querySelectorAll('tbody tr') | |
for (const row of rows) { | |
const cols = row.querySelectorAll('td') | |
const fields = [] | |
for (const col of cols) { | |
fields.push(col.innerText.trim()) | |
} | |
items.push(fields) | |
} | |
return items | |
} | |
itemsToCsv(items) { | |
let s = '' | |
for (const item of items) { | |
const fieldsString = item.map((f) => (`"${f}"`)).join(',') | |
s = `${s}\n${fieldsString}` | |
} | |
return s | |
} | |
async run() { | |
let allItems = [] | |
const numberOfPages = 368; | |
const baseURL = `https://www.bestcashcow.com/banks/page-` | |
for (let pageNumber = 1; pageNumber < (numberOfPages+1); pageNumber++) { | |
const url = `${baseURL}${pageNumber}` | |
console.log(url) | |
const items = await this.parseDataFromURL(url) | |
allItems = allItems.concat(items) | |
console.log(items.length) | |
await this.sleep(1000) | |
} | |
console.log(this.itemsToCsv(allItems)) | |
return null | |
} | |
} | |
const scraper = new Scraper(); | |
await scraper.run(); | |
})(); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment