Last active
August 29, 2015 14:01
-
-
Save caub/7b980ac18e524a40876c to your computer and use it in GitHub Desktop.
web scraping
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from selenium.webdriver.common.keys import Keys | |
from selenium.common.exceptions import NoSuchElementException,ElementNotVisibleException | |
import time | |
import requests | |
import json | |
import urlparse | |
browser = webdriver.Firefox() | |
# api_id = '918229bb0dc8940' | |
# browser.get('https://api.imgur.com/oauth2/authorize?client_id={}&response_type=token'.format(api_id)) | |
# browser.find_element_by_id('allow').click() | |
# x=browser.current_url | |
# u=urlparse(x) | |
# token = urlparse.parse_qs(u.fragment)['access_token'] | |
# print token | |
# set http header Authorization: Bearer token | |
# selenium can't do that | |
# todo run imgur api requests in js with executescript | |
# or set a proxy that takes token as query string and put it in header | |
browser.get('http://www.e-adrenaline.fr/terre/actualites/portfolio-quand-les-chevres-defient-la-gravite/4084') | |
print browser.title.encode('utf-8').strip() | |
browser.find_element_by_xpath("//div[@id='diapo-1']/a").click() | |
imgs=[] | |
x= browser.find_element_by_id('lightbox-image') | |
while x!=None: | |
print x.get_attribute('src') | |
imgs.append(x.get_attribute('src')) | |
# payload = {'image': url} | |
# r = requests.post('https://api.imgur.com/3/image', data=payload) | |
try: | |
browser.find_element_by_id('lightbox-nav-btnNext').click() | |
except (NoSuchElementException,ElementNotVisibleException): | |
print 'stop' | |
break | |
time.sleep(5) | |
x= browser.find_element_by_id('lightbox-image') | |
print imgs |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from selenium.webdriver.common.keys import Keys | |
import time | |
import os | |
import csv | |
import getpass | |
import pymongo | |
import logging | |
user = '...' | |
password = '..' | |
loginuri = "https://..../auth_login" | |
def historyuri(id): | |
return "http://..../{}/export/history".format(id) | |
def tradinguri(id): | |
return "http://.../{}/export/trading".format(id) | |
def histcsv(id): | |
return 'C:/users/{}/downloads/{}.history.csv'.format(getpass.getuser(), id) | |
def tradingcsv(id): | |
return 'C:/users/{}/downloads/{}.trading.csv'.format(getpass.getuser(), id) | |
client = pymongo.MongoClient("mongodb://ddfgdfg@fdgdfg:fdfd/gd") | |
db = client.jfx | |
def updatehistory(id, csvreader): | |
next(csvreader) #first line is headers | |
for row in csvreader: | |
com=float(row[9].replace(' ','')) if row[9]!='' else 0 | |
swap=float(row[10].replace(' ','')) if row[10]!='' else 0 | |
pnl=float(row[11].replace(' ','')) if row[11]!='' else 0 | |
o = { | |
'_id': id+'_'+row[3]+'_'+row[1]+'_'+row[2]+'_'+row[4]+'_'+row[8]+'_'+row[0]+'_'+row[7], | |
'state': '1', | |
'account_id': id, | |
'strategy':'', | |
'symbol': row[3], | |
'type': row[1], | |
'amount': row[2], | |
'ot': row[0], | |
'op': row[4], | |
'sl': row[5], | |
'tp': row[6], | |
'ct': row[7], | |
'cp': row[8], | |
'commission': row[9], | |
'interest': row[10], | |
'pnl': str(pnl+com+swap), | |
'pips': '0' | |
} | |
db.tests.mql5history.save(o) | |
def updatetrading(id, csvreader): | |
# next(csvreader) #first line is headers | |
for row in csvreader: | |
if (row[0]=='Time'): | |
continue | |
print row | |
com=float(row[8].replace(' ','')) if row[8]!='' else 0 | |
swap=float(row[9].replace(' ','')) if row[9]!='' else 0 | |
pnl=float(row[10].replace(' ','')) if row[10]!='' else 0 | |
o = { | |
'_id': id+'_'+row[3]+'_'+row[1]+'_'+row[2]+'_'+row[4]+'__'+row[0]+'_'+row[7], | |
'state': '1', | |
'account_id': id, | |
'strategy':'', | |
'symbol': row[3], | |
'type': row[1], | |
'amount': row[2], | |
'ot': row[0], | |
'op': row[4], | |
'sl': row[5], | |
'tp': row[6], | |
'cp': row[7], | |
'commission': row[8], | |
'interest': row[9], | |
'pnl': str(pnl+com+swap), | |
'pips': '0' | |
} | |
db.tests.mql5trading.save(o) | |
# db.tests.mql5users.save({'users':['317','111']}) | |
# Web scraping | |
browser = webdriver.Chrome() | |
browser.get(loginuri) | |
print browser.title | |
browser.find_element_by_name('Login').send_keys(user) | |
browser.find_element_by_name('Password').send_keys(password ) #+ Keys.RETURN | |
browser.find_element_by_css_selector('.buttonActive').click() | |
accountIds = db.tests.mql5users.find_one()['users'] | |
logger = logging.getLogger() | |
logger.setLevel(logging.DEBUG) | |
logger.info(' go ') | |
## refresh DB each minute | |
while True: | |
for id in accountIds: | |
try: | |
os.remove(histcsv(id))# remove the file else you'll have (2).csv, (3).csv ... | |
os.remove(tradingcsv(id)) | |
except OSError, e: | |
pass | |
browser.get(historyuri(id)) | |
browser.get(tradinguri(id)) | |
time.sleep(5) # 5 seconds to make sure .csv are downloaded | |
for id in accountIds: | |
try: | |
with open(histcsv(id), 'rb') as csvfile: | |
updatehistory(id, csv.reader(csvfile, delimiter=';')) | |
logger.info(' updated history '+id) | |
with open(tradingcsv(id), 'rb') as csvfile: | |
updatetrading(id, csv.reader(csvfile, delimiter=';')) | |
logger.info(' updated trading '+id) | |
except IOError : | |
logger.info('file not found '+id) | |
time.sleep(55) | |
print(' ---------------- ') | |
browser.quit() | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from selenium.webdriver.common.keys import Keys | |
import time | |
import os | |
import csv | |
import getpass | |
import json | |
from sqlalchemy.ext.declarative import declarative_base | |
from sqlalchemy import Column, ForeignKey, Integer, String, Text, desc | |
from sqlalchemy import create_engine | |
from sqlalchemy.orm import sessionmaker | |
import logging | |
user = '...' | |
password = '..' | |
loginuri = "https://.../auth_login" | |
def historyuri(id): | |
return "http://.../{}/export/history".format(id) | |
def tradinguri(id): | |
return "http://..../{}/export/trading".format(id) | |
def histcsv(id): | |
return 'C:/users/{}/downloads/{}.history.csv'.format(getpass.getuser(), id) | |
def tradingcsv(id): | |
return 'C:/users/{}/downloads/{}.trading.csv'.format(getpass.getuser(), id) | |
Base = declarative_base() | |
engine = create_engine('sqlite:///:memory:') | |
# engine = create_engine("mysql://hjkhkhjk:3306/pe") | |
# engine = create_engine("mysql://{}:{}@{}/{}".format("gdfg","dfgg","gdfgd","dsf")) | |
logging.basicConfig() | |
logging.getLogger('sqlalchemy.engine').setLevel(logging.ERROR) | |
Base.metadata.bind = engine | |
DBSession = sessionmaker(bind=engine) | |
session = DBSession() | |
class User(Base): | |
__tablename__ = 'users' | |
mql5_id = Column(String(255), primary_key=True) | |
open_position = Column(Text()) | |
closed_position = Column(Text()) | |
def __repr__(self): | |
return 'User: {}, {}'.format(self.mql5_id, self.open_positions) | |
class Order(Base): | |
__tablename__ = 'orders' | |
account_id = Column(String(255), ForeignKey('users.mql5_id')) | |
state = Column(String(12)) | |
strategy = Column(String(255)) | |
symbol = Column(String(255)) | |
type = Column(String(255)) | |
amount = Column(String(255)) | |
ot = Column(String(255)) | |
op = Column(String(255)) | |
sl = Column(String(255)) | |
tp = Column(String(255)) | |
ct = Column(String(255)) | |
cp = Column(String(255)) | |
commission = Column(String(255)) | |
interest = Column(String(255)) | |
pnl = Column(String(255)) | |
pips = Column(String(255)) | |
id = Column(String(255), primary_key=True) | |
def __repr__(self): | |
return 'Order: {}'.format(self.id) | |
Base.metadata.create_all(engine) # necessary the 1st time comment it otherwise | |
def gethistory(id, csvreader, last_ct): | |
#next(csvreader) #first line is headers | |
orders = [] | |
max_ct=last_ct | |
# todo sort by ct and update only 15 | |
for row in csvreader: | |
if (row[0]=='Time'): | |
continue | |
if row[7] > last_ct: | |
max_ct = max(max_ct,row[7]) | |
com=float(row[9].replace(' ','')) if row[9]!='' else 0 | |
swap=float(row[10].replace(' ','')) if row[10]!='' else 0 | |
pnl=float(row[11].replace(' ','')) if row[11]!='' else 0 | |
orders.append(Order( | |
state = '1', | |
account_id = id, | |
strategy ='', | |
symbol = row[3], | |
type = row[1], | |
amount = row[2], | |
ot = row[0], | |
op = row[4], | |
sl = row[5], | |
tp = row[6], | |
ct = row[7], | |
cp = row[8], | |
commission = row[9], | |
interest = row[10], | |
pnl = str(pnl+com+swap), | |
pips = '0', | |
id =id+'_'+row[3]+'_'+row[1]+'_'+row[2]+'_'+row[4]+'_'+row[8]+'_'+row[0]+'_'+row[7] | |
)) | |
# orders.append({ | |
# 'state': '1', | |
# 'accountId': id, | |
# 'strategy':'', | |
# 'symbol': row[3], | |
# 'type': row[1], | |
# 'amount': row[2], | |
# 'ot': row[0], | |
# 'op': row[4], | |
# 'sl': row[5], | |
# 'tp': row[6], | |
# 'ct': row[7], | |
# 'cp': row[8], | |
# 'commission': row[9], | |
# 'interest': row[10], | |
# 'pnl': str(pnl+com+swap), | |
# 'pips': '0', | |
# 'id': id+'_'+row[3]+'_'+row[1]+'_'+row[2]+'_'+row[4]+'_'+row[8]+'_'+row[0]+'_'+row[7] | |
# }) | |
return (orders,max_ct) | |
def gettrading(id, csvreader): | |
# next(csvreader) #first line is headers | |
orders = [] | |
for row in csvreader: | |
if (row[0]=='Time'): | |
continue | |
com=float(row[8].replace(' ','')) if row[8]!='' else 0 | |
swap=float(row[9].replace(' ','')) if row[9]!='' else 0 | |
pnl=float(row[10].replace(' ','')) if row[10]!='' else 0 | |
orders.append({ | |
'state': '0', | |
'accountId': id, | |
'strategy':'', | |
'symbol': row[3], | |
'type': row[1], | |
'amount': row[2], | |
'ot': row[0], | |
'op': row[4], | |
'sl': row[5], | |
'tp': row[6], | |
'ct': '', | |
'cp': row[7], | |
'commission': row[8], | |
'interest': row[9], | |
'pnl': str(pnl+com+swap), | |
'pips': '0', | |
'id': id+'_'+row[3]+'_'+row[1]+'_'+row[2]+'_'+row[4]+'_;_'+row[0] | |
}) | |
return orders | |
users = session.query(User) | |
if users.count()==0: | |
session.add_all([User(mql5_id='317'), User(mql5_id='111')]) # | |
session.commit() | |
users = session.query(User) | |
browser = webdriver.Chrome() | |
browser.get(loginuri) | |
print browser.title | |
browser.find_element_by_name('Login').send_keys(user) | |
browser.find_element_by_name('Password').send_keys(password ) #+ Keys.RETURN | |
browser.find_element_by_css_selector('.buttonActive').click() | |
logger = logging.getLogger() | |
logger.setLevel(logging.DEBUG) | |
logger.info(' go ') | |
## refresh DB each minute | |
last_ct = '' #keep track last ot to avoid unessecary history merges | |
while True: | |
for user in users: | |
try: | |
os.remove(histcsv(user.mql5_id))# remove the file else you'll have (2).csv, (3).csv ... | |
os.remove(tradingcsv(user.mql5_id)) | |
except OSError, e: | |
pass | |
browser.get(historyuri(user.mql5_id)) | |
browser.get(tradinguri(user.mql5_id)) | |
time.sleep(15) # 5 seconds to make sure .csv are downloaded | |
for user in users: | |
try: | |
with open(tradingcsv(user.mql5_id), 'rb') as csvfile: | |
orders = gettrading(user.mql5_id, csv.reader(csvfile, delimiter=';')) | |
print orders | |
print '...' | |
user.open_position = json.dumps(orders) | |
logger.info(' updated trading {} {}'.format(user.mql5_id,len(orders))) | |
with open(histcsv(user.mql5_id), 'rb') as csvfile: | |
(orders,max_ct) = gethistory(user.mql5_id, csv.reader(csvfile, delimiter=';'),last_ct) | |
print ' ... {} {}'.format(last_ct, max_ct) | |
last_ct = max_ct | |
# user.closed_positions = orders | |
#no cascading for now | |
session.add_all(orders) | |
logger.info(' updated history {} {}'.format(user.mql5_id,len(orders))) | |
session.commit() | |
except IOError : | |
logger.info('file not found '+user.mql5_id) | |
time.sleep(10) | |
print 'test' | |
for o in session.query(User).filter(User.mql5_id=='317'): | |
print o | |
# for o in session.query(Order).filter(Order.state=='1').order_by(desc(Order.ot)).limit(5): | |
# print o | |
time.sleep(35) | |
print(' ---------------- ') | |
browser.quit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment