Skip to content

Instantly share code, notes, and snippets.

@pfeilbr
Created September 26, 2018 16:09
Show Gist options
  • Save pfeilbr/5c2fb9141b8292b6b352063ac10ce1f7 to your computer and use it in GitHub Desktop.
Save pfeilbr/5c2fb9141b8292b6b352063ac10ce1f7 to your computer and use it in GitHub Desktop.
export list of US banks in csv
(async () => {
class Scraper {
sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
async fetchURLContent(url) {
const resp = await fetch(url)
return await resp.text()
}
async parseDataFromURL(url) {
const parser = new DOMParser()
const items = []
const html = await this.fetchURLContent(url)
const doc = parser.parseFromString(html, "text/html")
const table = doc.querySelector('.bank_savings_rates')
const rows = table.querySelectorAll('tbody tr')
for (const row of rows) {
const cols = row.querySelectorAll('td')
const fields = []
for (const col of cols) {
fields.push(col.innerText.trim())
}
items.push(fields)
}
return items
}
itemsToCsv(items) {
let s = ''
for (const item of items) {
const fieldsString = item.map((f) => (`"${f}"`)).join(',')
s = `${s}\n${fieldsString}`
}
return s
}
async run() {
let allItems = []
const numberOfPages = 368;
const baseURL = `https://www.bestcashcow.com/banks/page-`
for (let pageNumber = 1; pageNumber < (numberOfPages+1); pageNumber++) {
const url = `${baseURL}${pageNumber}`
console.log(url)
const items = await this.parseDataFromURL(url)
allItems = allItems.concat(items)
console.log(items.length)
await this.sleep(1000)
}
console.log(this.itemsToCsv(allItems))
return null
}
}
const scraper = new Scraper();
await scraper.run();
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment