Skip to content

Instantly share code, notes, and snippets.

@simonmichael
Last active January 19, 2019 17:42
Show Gist options
  • Save simonmichael/3b32b660cc497a493da6b6ba04141b21 to your computer and use it in GitHub Desktop.
Save simonmichael/3b32b660cc497a493da6b6ba04141b21 to your computer and use it in GitHub Desktop.
A WIP Wells Fargo bank scraper in haskell that I couldn't get working reliably (2016). If you do, please let me know!
#!/usr/bin/env stack
{- stack exec
--verbosity info
--package webdriver
-- ghc
-}
--package data-default
-- stack exec ghci wellsfargo
-- stack exec ghc wellsfargohs
--
-- https://muehe.org/posts/loading-chase-transactions-into-ledger/
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE BangPatterns #-}
import Control.Concurrent
import Control.Exception
import Control.Monad
import Control.Monad.IO.Class
import Data.Aeson (FromJSON)
import qualified Data.ByteString.Lazy as B
-- import Data.Default
import Data.List
import Data.Maybe
import qualified Data.Text as T
import Data.Time
import System.Environment
import System.IO
import Test.WebDriver
import Test.WebDriver.Commands.Wait
import Test.WebDriver.JSON (ignoreReturn)
config =
-- useBrowser (Firefox
-- Nothing
-- def -- http://hackage.haskell.org/package/webdriver-0.8.4/docs/Test-WebDriver-Capabilities.html#t:LogLevel
-- Nothing
-- -- (Just "/Applications/Firefox.app/Contents/MacOS/firefox")
-- )
defaultConfig
{wdHistoryConfig=noHistory}
waitTime = 20
-- :m Test.WebDriver Test.WebDriver.Session
-- s <- runSession defaultConfig $ getSession
-- -- getSession from Test.WebDriver.Session simply returns the existing session stored in the state monad. Once you have a session, you can then run commands on it like this:
-- runWD s $ openPage "http://example.com"
-- -- runSession creates a new WDSession using the given WDConfig, whereas runWD takes an existing WDSession reference and runs commands with that
main = do
args <- getArgs
let !acctno = read $ head args
hPutStr stderr "Username: " >> hFlush stderr
u <- getLine
hPutStr stderr "Password: " >> hFlush stderr
p <- getLine
-- let (u,p) = ("X", "X")
iolog "starting browser.."
csv <- runSession config
-- . finallyClose
$ do
login (T.pack u) (T.pack p)
accountDetail acctno
accountDownload
downloadCsv
putStrLn $ chomp csv
-- --> login page --> account summary page
login u p = do
wdlog "loading login page.."
openPage "https://connect.secure.wellsfargo.com/auth/login/present"
findElem ( ByCSS "input[id=j_username]" ) >>= sendKeys u
findElem ( ByCSS "input[id=j_password]" ) >>= sendKeys p
findElem ( ByCSS "form[id=Signon]" ) >>= submit
-- account summary page --> account detail page
accountDetail :: Int -> WD ()
accountDetail n = do
wdlog "loading account summary page.."
accountlinks <- waitUntil waitTime $ do
es <- findElems (ByCSS "a.account-title-group")
-- wdlog $ "accountlinks: " ++ show es
expect $ length es == 5
return es
accountlinks' <-
if (length accountlinks < 5)
then do
wdlog $ "Warning: waiting for the account links failed: " ++ show accountlinks
waitUntil waitTime $
findElems (ByCSS "a.account-title-group") >>= expect . ((==5).length)
findElems (ByCSS "a.account-title-group")
else return accountlinks
-- wdlog $ "accountlinks': " ++ show accountlinks'
let l = accountlinks' !! (n-1)
Just u <- l `attr` "data-url"
wdlog $ "loading account detail page.." ++ T.unpack u
-- XXX sometimes no effect, try a delay
liftIO $ threadDelay 1000000
wfNavigate u "page"
-- account detail page --> download page
accountDownload :: WD ()
accountDownload = do
-- wdlog "waiting for download link.."
-- XXX sometimes times out waiting for Download Account Activity, doesn't leave account summary page
-- l <- waitUntil waitTime $ findElem (ByLinkText "Download Account Activity")
-- `onTimeout` do
-- writeScreenshot "screen.png"
-- failedCommand Timeout "timed out waiting for download link, check screenshot"
l <- waitUntil waitTime $ do
-- writeScreenshot "screen.png"
findElem (ByLinkText "Download Account Activity")
Just u <- l `attr` "data-url"
wdlog "loading account download page.."
wfNavigate u "saml"
-- download page --> download CSV file via ajax & return content
downloadCsv :: WD String
downloadCsv = do
wdlog "waiting for CSV option.."
waitUntil waitTime $ do
e <- findElem (ByCSS "input[id=commaDelimited]")
wdlog "clicking CSV option.."
click e
wdlog "waiting for CSV option to be checked.."
waitUntil waitTime $ do
es <- findElems (ByCSS "input[id=commaDelimited]:checked")
-- wdlog $ "checked: " ++ show es
expect $ not $ null es
-- credit card accounts don't set a default date range and allow up to 120 days, set that for all
let
days = 120
fmt = T.pack . formatTime defaultTimeLocale "%m/%d/%y"
wdlog $ "setting date range to last "++show days++" days.."
todate <- liftIO getCurrentDay
let fromdate = addDays (-days) todate
findElem (ByCSS "input[id=fromDate]") >>= sendKeys (fmt fromdate)
findElem (ByCSS "input[id=toDate]") >>= sendKeys (fmt todate)
wdlog "downloading CSV.."
wfSubmitDownloadForm
wfNavigate :: T.Text -> String -> WD ()
wfNavigate url displaytype = do
waitForJquery
ignoreReturn $ executeJS [JSArg url, JSArg displaytype] $ T.unlines
["$(document).trigger('mwfNavigation', {"
," url: arguments[0],"
," displayType: arguments[1],"
," targetElement: '.page'"
,"})"
]
wfSubmitDownloadForm :: WD String
wfSubmitDownloadForm = do
waitForJquery
executeJS [] $ T.unlines
["var $f = $('form[id=accountActivityDownloadModel]');"
,"var jqxhr = $.ajax({"
," async: false,"
," type: 'POST',"
," url: $f.attr('action'),"
," data: $f.serialize() + '&Download=Download',"
-- avoid WF's jsonDataFilter
," global: false,"
," dataFilter: function(data,type){ return data; },"
,"});"
,"return jqxhr.responseText;"
]
-- in browser console:
-- $.ajax({'async':false, 'type':'POST', 'url':$f.attr('action'), 'data':$f.serialize()+'&Download=Download', 'global':false })
-- download binary file: http://jsfiddle.net/3kUXy/
-- TODO handle html response containing "There is no Account Activity information available for this account."
waitForJquery :: WD ()
waitForJquery = do
-- wdlog "waiting for jQuery.."
waitUntil waitTime $ catchFailedCommand JavascriptError $ ignoreReturn (executeJS [] "jQuery")
iolog :: String -> IO ()
iolog s = hPutStrLn stderr s >> hFlush stderr
wdlog :: String -> WD ()
wdlog = liftIO . iolog
p :: Show a => a -> WD ()
p = liftIO . print
printUrl :: WD ()
printUrl = getCurrentURL >>= liftIO . putStrLn
wait :: Double -> WD ()
wait secs = liftIO $ threadDelay (round $ secs * 1000000)
writeScreenshot :: FilePath -> WD ()
writeScreenshot f = screenshot >>= liftIO . B.writeFile f
-- | Remove leading and trailing whitespace.
strip :: String -> String
strip = lstrip . rstrip
-- | Remove leading whitespace.
lstrip :: String -> String
lstrip = dropWhile (`elem` (" \t"::String)) :: String -> String -- XXX isSpace ?
-- | Remove trailing whitespace.
rstrip :: String -> String
rstrip = reverse . lstrip . reverse
-- | Remove trailing newlines/carriage returns.
chomp :: String -> String
chomp = reverse . dropWhile (`elem` ("\r\n"::String)) . reverse
getCurrentDay :: IO Day
getCurrentDay = do
t <- getZonedTime
return $ localDay (zonedTimeToLocalTime t)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment