Skip to content

Instantly share code, notes, and snippets.

@rafaelquintanilha
Created December 5, 2024 13:54
Show Gist options
  • Save rafaelquintanilha/1c12d52b64e6dc72b04d6bd641795ceb to your computer and use it in GitHub Desktop.
Save rafaelquintanilha/1c12d52b64e6dc72b04d6bd641795ceb to your computer and use it in GitHub Desktop.
Code Night #001
# Video: https://www.youtube.com/watch?v=PRkVIBqC-is
import time
import pandas as pd
from bs4 import BeautifulSoup
from playwright.sync_api import sync_playwright
from quantbrasil.models.asset import get_ticker_map
from quantbrasil.models.portfolio import add_assets, create, remove_all
def parse_table_beautifulsoup(table_html):
"""
Parse HTML table using BeautifulSoup
Returns a list of dictionaries with the parsed data
"""
soup = BeautifulSoup(table_html, "html.parser")
# Extract headers
headers = []
for th in soup.thead.find_all("th"):
headers.append(th.text.strip())
# Extract rows
rows = []
for tr in soup.tbody.find_all("tr"):
row = {}
for idx, td in enumerate(tr.find_all("td")):
# Clean and format the data
value = td.text.strip()
# Convert numeric values
if idx == 3: # Qtde. Teórica column
value = int(value.replace(".", ""))
elif idx == 4: # Part. (%) column
value = float(value.replace(",", "."))
row[headers[idx]] = value
rows.append(row)
return rows
def parse_table_pandas(table_html):
"""
Parse HTML table using pandas
Returns a DataFrame with the parsed data
"""
# Read HTML table
df = pd.read_html(f"<table>{table_html}</table>")[0]
# Clean column names
df.columns = [col.strip() for col in df.columns]
# Convert 'Qtde. Teórica' to numeric, removing dots
df["Qtde. Teórica"] = df["Qtde. Teórica"].apply(
lambda x: int(str(x).replace(".", ""))
)
# Convert 'Part. (%)' to numeric, replacing comma with dot
df["Part. (%)"] = df["Part. (%)"].apply(lambda x: float(str(x).replace(",", ".")))
return df
# Example usage
def process_table(table_html, method="pandas"):
try:
if method == "pandas":
data = parse_table_pandas(table_html)
print(f"Successfully parsed {len(data)} rows using pandas")
return data
else:
data = parse_table_beautifulsoup(table_html)
print(f"Successfully parsed {len(data)} rows using BeautifulSoup")
return data
except Exception as e:
print(f"Error parsing table: {str(e)}")
return None
b3_url = "https://sistemaswebb3-listados.b3.com.br/indexPage/day/IBXX?language=pt-br"
with sync_playwright() as p:
browser = p.chromium.launch()
page = browser.new_page()
page.goto(b3_url)
# # wait for the table to load
table = page.wait_for_selector("table.table-responsive-sm.table-responsive-md")
select = page.locator("#selectPage")
select.select_option("120")
# wait a few seconds
time.sleep(2)
# wait for the table to load
table = page.wait_for_selector("table.table-responsive-sm.table-responsive-md")
table_html = table.inner_html()
data = process_table(table_html, method="beautifulsoup")
tickers = []
weights = []
for row in data:
ticker = row.get("Código")
weight = row.get("Part. (%)")
tickers.append(ticker)
weights.append(weight)
print(tickers)
print(f"Found {len(tickers)} tickers")
browser.close()
portfolio_id = create("IBX100")
print(f"Created portfolio with id {portfolio_id}")
remove_all(portfolio_id)
print(f"Removed all assets from portfolio {portfolio_id}")
ticker_map = get_ticker_map(tickers)
asset_ids = [ticker_map[ticker] for ticker in tickers]
add_assets(asset_ids, portfolio_id, weights)
print(f"Added {len(asset_ids)} assets to portfolio")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment