Skip to content

Instantly share code, notes, and snippets.

# product title
title = soup.find(id='productTitle').get_text().strip()
# to prevent script from crashing when there isn't a price for the product
try:
price = float(soup.find(id='priceblock_ourprice').get_text().replace('.', '').replace('€', '').replace(',', '.').strip())
except:
price = ''
# review score
tracker_log = pd.DataFrame() # later on, this will be your previous search history file
now = datetime.now().strftime('%Y-%m-%d %Hh%Mm')
log = pd.DataFrame({'date': now.replace('h',':').replace('m',''),
'code': prod_tracker.code[x], # this code comes from the TRACKER_PRODUCTS file
'url': url,
'title': title,
'buy_below': prod_tracker.buy_below[x], # this price comes from the TRACKER_PRODUCTS file
'price': price,
'stock': stock,
import requests
from glob import glob
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
from time import sleep
# http://www.networkinghowtos.com/howto/common-user-agent-list/
HEADERS = ({'User-Agent':
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
@fnneves
fnneves / bs4.py
Created September 27, 2020 17:17
import bs4
html = """<html>
<div class="some_class">
some random text to be scraped
</div>
</html>"""
parsed_html = bs4.BeautifulSoup(html, "html.parser")
text_scraped = parsed_html.find("div",attrs={"class":"some_class"}).text
@fnneves
fnneves / bs4_example.py
Last active September 28, 2020 18:50
webscraping tools comparison
from bs4 import BeautifulSoup
import requests
url = "https://en.wikipedia.org/wiki/Academy_Award_for_Best_Director"
page_code = requests.get(url)
soup = BeautifulSoup(page_code.content, features="lxml")
# printing the page title
print(soup.find("h1").text)
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
# Tell Selenium where you installed the chromedriver!
chromedriver_path = 'C:/Users/User/Downloads/chromedriver_win32/chromedriver.exe'
webdriver = webdriver.Chrome(executable_path=chromedriver_path)
# Getting the website through the webdriver
webdriver.get('https://www.somewebsite.com/accounts/login/?source=auth_switcher')
import pandas as pd
from glob import glob
from time import strftime, sleep
import numpy as np
from datetime import datetime
from pandas_datareader import data as pdr
from pandas.tseries.offsets import BDay
import yfinance as yf
yf.pdr_override()
ly = datetime.today().year-1
today = datetime.today()
start_sp = datetime(2019, 1, 1)
end_sp = today
start_stocks = datetime(2019, 1, 1)
end_stocks = today
start_ytd = datetime(ly, 12, 31) + BDay(1)
def get(tickers, startdate, enddate):
def data(ticker):
MEGA_DICT = {} # you have to create it first
min_date = '2020-01-01' # optional
TX_COLUMNS = ['date','ticker', 'cashflow', 'cml_units', 'cml_cost', 'gain_loss']
tx_filt = all_transactions[TX_COLUMNS] # keeping just the most relevant ones for now
for ticker in filt_tickers:
prices_df = all_data[all_data.index.get_level_values('ticker').isin([ticker])].reset_index()
## Can add more columns like volume!
PX_COLS = ['date', 'adj_close']
prices_df = prices_df[prices_df.date >= min_date][PX_COLS].set_index(['date'])
portf_allvalues = MEGA_DF.filter(regex='mktvalue').fillna(0) # getting just the market value of each ticker
portf_allvalues['portf_value'] = portf_allvalues.sum(axis=1) # summing all market values
# For the S&P500 price return
sp500 = pdr.get_data_yahoo('^GSPC', start_stocks, end_sp)
clean_header(sp500)
#getting the pct change
portf_allvalues = portf_allvalues.join(sp500['adj_close'], how='inner')
portf_allvalues.rename(columns={'adj_close': 'sp500_mktvalue'}, inplace=True)