Last active
March 23, 2018 23:37
-
-
Save tamlyn/cf5a09bc600adbc1387fc0f49ba07b96 to your computer and use it in GitHub Desktop.
Scrape statements from First Direct internet banking
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//////// CONFIG ///////// | |
// your first direct user name | |
const username = '' | |
// your password (the one it asks for characters from) | |
const password = '' | |
// your memorable word | |
const memorable = '' | |
// set to false if you want to see the browser window as it runs | |
const headless = true | |
///////// END CONFIG ////////// | |
// install puppeteer with npm | |
const puppeteer = require('puppeteer') | |
const fs = require('fs') | |
if (!fs.existsSync('statements')) { | |
fs.mkdirSync('statements') | |
} | |
const log = msg => console.log(msg) | |
log('Launching browser') | |
const browserPromise = puppeteer.launch({ headless: true }) | |
async function run() { | |
const browser = await browserPromise | |
const page = await browser.newPage() | |
await page.goto('https://www2.firstdirect.com/1/2/pib-service') | |
log('Logging in') | |
// enter username | |
await page.click('input[name=userid]') | |
await page.keyboard.type(username) | |
await page.click('input[type=submit]') | |
await page.waitForNavigation() | |
// switching to non-secure key login | |
const links = await page.$x('//a[contains(text(), "Log on without your Secure Key")]') | |
await links[0].click() | |
await page.waitForNavigation() | |
// read required characters from page | |
const text = await page.evaluate( | |
() => document.querySelectorAll('p')[1].innerHTML | |
) | |
const matches = [] | |
let match | |
const digits = /\d+|penultimate|Last/g | |
while (match = digits.exec(text)) matches.push(match[0]) | |
const indices = matches.map(match => { | |
if (match === 'Last') return -1 | |
if (match === 'penultimate') return -2 | |
return parseInt(match, 10) - 1 | |
}) | |
const characters = indices.map(index => password.substr(index, 1)) | |
// type password characters | |
await page.type('#keyrcc_password_first', characters[0]) | |
await page.type('#keyrcc_password_second', characters[1]) | |
await page.type('#keyrcc_password_third', characters[2]) | |
await page.type('#memorableAnswer', 'portishead') | |
await page.click('input[type=submit]') | |
await page.waitForNavigation() | |
log('Loading statements') | |
// navigate to statements | |
await page.click('#link1') | |
await page.click('[id="link1,0"]') | |
await page.waitForNavigation() | |
await page.click('#vcpost8 a') | |
await page.waitForNavigation() | |
// load list of available statements | |
const dates = await page.evaluate( | |
() => Array.from(document.querySelectorAll('#StatementDate option')) | |
.map(el => el.innerText.trim()) | |
) | |
// drop first value which is not a date | |
dates.shift() | |
// cycle through each statement | |
for (let date of dates) { | |
console.log('Fetching', date) | |
// navigate to statement | |
await page.select('#StatementDate', date) | |
await page.click('#StatementDate ~ a') | |
await page.waitForNavigation() | |
// load table data | |
const data = await page.evaluate( | |
() => Array.from(document.querySelectorAll('tr[bgcolor]')) | |
.map(tr => Array.from(tr.querySelectorAll('td')).map(td => td.innerText.trim())) | |
) | |
let lastDate | |
for (let i in data) { | |
if (data[i][0]) lastDate = data[i][0] | |
else data[i][0] = lastDate | |
} | |
// convert to CSV string | |
const csv = data.map(row => '"' + row.join('","') + '"').join('\n') | |
// write to file | |
const [, day, month, year] = date.match(/(\d+)\/(\d+)\/(\d+)/) | |
fs.writeFileSync(`statements/${year}-${month}-${day}.csv`, csv) | |
} | |
console.log('Finished') | |
await browser.close() | |
} | |
// run it and catch errors | |
run() | |
.catch(err => { | |
console.error(err) | |
browserPromise.then(browser => browser.close()) | |
}) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment