Last active
December 29, 2015 14:09
-
-
Save maggiben/7682330 to your computer and use it in GitHub Desktop.
NASDAQ100 Yahoo Scrapper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var fs = require("fs"); | |
var index = 1; | |
var obj = []; | |
var lineNumber = 0; | |
fs.readFileSync('./Bovespa.csv').toString().split('\n').forEach(function (line) { | |
var array = line.split(";"); | |
try { | |
var d = array[0].split("/"); | |
var date = new Date(d[2], d[1], d[0]); | |
var last = array[1].replace(/\./g, "").replace(/,/g,"."); | |
var open = array[2].replace(/\./g, "").replace(/,/g,"."); | |
var diff = array[3].replace(/%/g, "").replace(/,/g, "."); | |
var max = array[4].replace(/\./g, "").replace(/,/g,"."); | |
var min = array[5].replace(/\./g, "").replace(/,/g,"."); | |
var volume = array[6].replace(/\./g, "").replace(/,/g,"."); | |
} | |
catch(err) { | |
console.log("ln: ", lineNumber, "arr: ", array); | |
} | |
var data = { | |
date: d[2]+"/"+d[1]+"/"+d[0], | |
last: last, | |
open: open, | |
diff: diff, | |
max: max, | |
min: min, | |
volume: volume | |
}; | |
obj.push(JSON.stringify(data)); | |
//console.log(line); | |
/*fs.open("./output.txt", 'a', 0666, function(err, fd) { | |
fs.writeSync(fd, line.toString() + "\n", null, undefined, function(err, written) { | |
})});*/ | |
lineNumber++; | |
}); | |
// write JSON | |
var data = JSON.stringify(obj); | |
fs.writeFile('./Bovespa.json', obj, function (err) { | |
if (err) { | |
console.log('There has been an error saving your configuration data.'); | |
console.log(err.message); | |
return; | |
} | |
console.log('Configuration saved successfully.') | |
}); | |
console.log(obj[2]); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Normalize dates for mongodb import | |
for file in $(ls ./stocks) | |
do | |
collection=$(echo ${file}|sed s/\.[^\.]*$//) | |
mongoimport --host linus.mongohq.com --port 10050 --username admin --password admin --db nasdaq100 --collection ${collection} --type csv --file ./stocks/${files} --headerline --upsert | |
done |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
db.aapl.find().forEach(function(doc) { | |
doc.Date=new Date(doc.Date.replace(/-/g,"\/")); | |
db.aapl.save(doc); | |
}) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
AAPL,ADBE,ADI,ADP,ADSK,AKAM,ALTR,ALXN,AMAT,AMGN,AMZN,ATVI,AVGO,BBBY,BIDU,BIIB,BRCM,CA,CELG,CERN,CHKP,CHRW,CHTR,CMCSA,COST,CSCO,CTRX,CTSH,CTXS,DISCA,DLTR,DTV,EBAY,EQIX,ESRX,EXPD,EXPE,FAST,FB,FFIV,FISV,FOSL,FOXA,GILD,GMCR,GOOG,GRMN,HSIC,INTC,INTU,ISRG,KLAC,KRFT,LBTYA,LINTA,LLTC,LMCA,MAR,MAT,MCHP,MDLZ,MNST,MSFT,MU,MXIM,MYL,NFLX,NTAP,NUAN,NVDA,ORLY,PAYX,PCAR,PCLN,QCOM,REGN,ROST,SBAC,SBUX,SHLD,SIAL,SIRI,SNDK,SPLS,SRCL,STX,SYMC,TSLA,TXN,VIAB,VIP,VOD,VRSK,VRTX,WDC,WFM,WYNN,XLNX,XRAY,YHOO |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for file in $(ls ./stocks.org) | |
do | |
awk -F"," '{ | |
if(NR <= 1) { | |
print $0 | |
} else { | |
print "ISODate("$1"),"$2","$3","$4","$5","$6","$7 | |
} | |
}' ./stocks.org/$file > ./stocks/$file | |
done |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# variables | |
SCRAPSDIR=./stocks | |
MONGOHOST="localhost" | |
MONGOPORT="27017" | |
MONGOUSER="admin" | |
MONGOPASS="admin" | |
MONGOBASE="nasdaq100" | |
# Help & Usage | |
function show_help { | |
echo "usage: --host <host> --port <port> --db <database>" | |
exit 0 | |
} | |
# Remove normalized scraps | |
rm $SCRAPSDIR.norm/* | |
for file in $(ls ./$SCRAPSDIR) | |
do | |
awk -F',' '{ | |
if(NR <= 1) { | |
print $0 | |
} else { | |
old = $1 | |
gsub(/-/, "/", $1) | |
printf("%s,%s,%s,%s,%s,%s,%s\n", $1, $2, $3, $4, $5, $6, $7) | |
} | |
}' ./$SCRAPSDIR/$file > ./$SCRAPSDIR.norm/$file | |
done | |
# Point scraps to normalized files | |
SCRAPSDIR=./stocks.norm | |
# Import them to mongo | |
for file in $(ls ./$SCRAPSDIR) | |
do | |
# make collection name from filename | |
collection=$(echo ${file} | sed s/\.[^\.]*$// | awk '{print tolower($0)}') | |
# import to mongodb | |
mongoimport --host $MONGOHOST --port $MONGOPORT --username $MONGOUSER --password $MONGOPASS --db $MONGOBASE --collection ${collection} --type csv --file ./$SCRAPSDIR/${file} --headerline --drop | |
# fix data types | |
mongo $MONGOHOST:$MONGOPORT/$MONGOBASE -u $MONGOUSER -p $MONGOPASS --eval "db.${collection}.find().forEach(function(doc){ | |
doc.Date = new Date(doc.Date) | |
printjson(doc.Date) | |
})" | |
done | |
#EOF |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for tkr in $(cat nasdaq100) | |
do | |
echo ${tkr} | |
curl "http://ichart.finance.yahoo.com/table.csv?s=${tkr}&a=00&b=1&c=1980&d=10&e=27&f=2013&g=d&ignore=.csv" > ./stocks/${tkr}.csv | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment