This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Use PyPika to generate a SQL query on DuckDB over parquet files in cloud storage.""" | |
import adlfs | |
import duckdb | |
import polars as pl | |
from polars import col | |
from pyarrow import dataset as ds | |
from pypika import Query, Table | |
from pypika import analytics as an | |
# Create a filesystem representing an Azure Blob Storage account |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Example of how to query parquet datasets from cloud storage accounts with Polars.""" | |
import adlfs | |
import polars as pl | |
from polars import col | |
from pyarrow import dataset as ds | |
# Create a filesystem representing an Azure Blob Storage account | |
fs = adlfs.AzureBlobFileSystem( | |
account_name="azureopendatastorage", sas_token="", container_name="mlsamples" | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from holidays import country_holidays, list_supported_countries | |
def main(): | |
"""Get a global holidays table.""" | |
years = 2022, 2023, 2024 | |
all_holidays = { | |
ct: country_holidays(ct, years=years) for ct in list_supported_countries().keys() | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Example of how to open a debugger anywhere in the middle of a pandas method chain.""" | |
import numpy as np | |
import seaborn as sns | |
from IPython.core.debugger import Pdb | |
df = sns.load_dataset("iris") | |
df = ( | |
df.assign(sepal_ratio=lambda x: np.divide(x["sepal_width"], x["sepal_length"])) | |
.pipe(lambda x: Pdb().set_trace()) # Opens the debugger, the dataframe is assigned to `x` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
LeBel.Vess.Study1 <- read.csv("https://osf.io/suza9/?action=download&version=1") | |
completeCases = subset(LeBel.Vess.Study1, completeCases==1) | |
cor.test(as.numeric(completeCases$attAnx), as.numeric(completeCases$warmFoods)) | |
plot(as.numeric(completeCases$attAnx), as.numeric(completeCases$warmFoods)) |