Created
October 30, 2023 21:28
-
-
Save dataders/5f532ad24348e577f40ed0c9017fd47f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "'2.1.1'" | |
| ] | |
| }, | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "pd.__version__" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 28, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "ID int64[pyarrow]\n", | |
| "FIRST_NAME string[pyarrow]\n", | |
| "LAST_NAME string[pyarrow]\n", | |
| "dtype: object\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>ID</th>\n", | |
| " <th>FIRST_NAME</th>\n", | |
| " <th>LAST_NAME</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1</td>\n", | |
| " <td>Michael</td>\n", | |
| " <td>P.</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>2</td>\n", | |
| " <td>Shawn</td>\n", | |
| " <td>M.</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>3</td>\n", | |
| " <td>Kathleen</td>\n", | |
| " <td>P.</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>4</td>\n", | |
| " <td>Jimmy</td>\n", | |
| " <td>C.</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>5</td>\n", | |
| " <td>Katherine</td>\n", | |
| " <td>R.</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " ID FIRST_NAME LAST_NAME\n", | |
| "0 1 Michael P.\n", | |
| "1 2 Shawn M.\n", | |
| "2 3 Kathleen P.\n", | |
| "3 4 Jimmy C.\n", | |
| "4 5 Katherine R." | |
| ] | |
| }, | |
| "execution_count": 28, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df_custo = pd.read_csv('~/Downloads/jaffle_shop_customers.csv',\n", | |
| " dtype_backend='pyarrow'\n", | |
| " )\n", | |
| "print(df_custo.dtypes)\n", | |
| "df_custo.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 29, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df_custo.to_parquet('jaffle_shop_customers.parquet',\n", | |
| " index=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## payments" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 30, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "ID int64[pyarrow]\n", | |
| "ORDERID int64[pyarrow]\n", | |
| "PAYMENTMETHOD string[pyarrow]\n", | |
| "STATUS string[pyarrow]\n", | |
| "AMOUNT int64[pyarrow]\n", | |
| "CREATED datetime64[ns]\n", | |
| "dtype: object\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>ID</th>\n", | |
| " <th>ORDERID</th>\n", | |
| " <th>PAYMENTMETHOD</th>\n", | |
| " <th>STATUS</th>\n", | |
| " <th>AMOUNT</th>\n", | |
| " <th>CREATED</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>credit_card</td>\n", | |
| " <td>success</td>\n", | |
| " <td>1000</td>\n", | |
| " <td>2018-01-01</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>2</td>\n", | |
| " <td>2</td>\n", | |
| " <td>credit_card</td>\n", | |
| " <td>success</td>\n", | |
| " <td>2000</td>\n", | |
| " <td>2018-01-02</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>3</td>\n", | |
| " <td>3</td>\n", | |
| " <td>coupon</td>\n", | |
| " <td>success</td>\n", | |
| " <td>100</td>\n", | |
| " <td>2018-01-04</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>4</td>\n", | |
| " <td>4</td>\n", | |
| " <td>coupon</td>\n", | |
| " <td>success</td>\n", | |
| " <td>2500</td>\n", | |
| " <td>2018-01-05</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>5</td>\n", | |
| " <td>5</td>\n", | |
| " <td>bank_transfer</td>\n", | |
| " <td>fail</td>\n", | |
| " <td>1700</td>\n", | |
| " <td>2018-01-05</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " ID ORDERID PAYMENTMETHOD STATUS AMOUNT CREATED\n", | |
| "0 1 1 credit_card success 1000 2018-01-01\n", | |
| "1 2 2 credit_card success 2000 2018-01-02\n", | |
| "2 3 3 coupon success 100 2018-01-04\n", | |
| "3 4 4 coupon success 2500 2018-01-05\n", | |
| "4 5 5 bank_transfer fail 1700 2018-01-05" | |
| ] | |
| }, | |
| "execution_count": 30, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df_pay = pd.read_csv('~/Downloads/stripe_payments.csv',\n", | |
| " parse_dates=['CREATED'],\n", | |
| " dtype_backend='pyarrow'\n", | |
| " )\n", | |
| "print(df_pay.dtypes)\n", | |
| "df_pay.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 39, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df_pay.to_parquet('stripe_payments.parquet',\n", | |
| " engine='pyarrow',\n", | |
| " use_deprecated_int96_timestamps=True,\n", | |
| " # allow_truncated_timestamps=True,\n", | |
| " index=False\n", | |
| ")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## orders" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 40, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "ID int64[pyarrow]\n", | |
| "USER_ID int64[pyarrow]\n", | |
| "ORDER_DATE datetime64[ns]\n", | |
| "STATUS string[pyarrow]\n", | |
| "dtype: object" | |
| ] | |
| }, | |
| "execution_count": 40, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df_orders = pd.read_csv('~/Downloads/jaffle_shop_orders.csv',\n", | |
| " parse_dates=['ORDER_DATE'],\n", | |
| " dtype_backend='pyarrow'\n", | |
| " )\n", | |
| "\n", | |
| "df_orders.dtypes" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 41, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df_orders.to_parquet('jaffle_shop_orders.parquet',\n", | |
| " engine='pyarrow',\n", | |
| " use_deprecated_int96_timestamps=True,\n", | |
| " # allow_truncated_timestamps=True,\n", | |
| " index=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 35, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "ID int64[pyarrow]\n", | |
| "USER_ID int64[pyarrow]\n", | |
| "ORDER_DATE datetime64[ns]\n", | |
| "STATUS string[pyarrow]\n", | |
| "dtype: object\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>ID</th>\n", | |
| " <th>USER_ID</th>\n", | |
| " <th>ORDER_DATE</th>\n", | |
| " <th>STATUS</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>2018-01-01</td>\n", | |
| " <td>returned</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>2</td>\n", | |
| " <td>3</td>\n", | |
| " <td>2018-01-02</td>\n", | |
| " <td>completed</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>3</td>\n", | |
| " <td>94</td>\n", | |
| " <td>2018-01-04</td>\n", | |
| " <td>completed</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>4</td>\n", | |
| " <td>50</td>\n", | |
| " <td>2018-01-05</td>\n", | |
| " <td>completed</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>5</td>\n", | |
| " <td>64</td>\n", | |
| " <td>2018-01-05</td>\n", | |
| " <td>completed</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>...</th>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>94</th>\n", | |
| " <td>95</td>\n", | |
| " <td>27</td>\n", | |
| " <td>2018-04-04</td>\n", | |
| " <td>placed</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>95</th>\n", | |
| " <td>96</td>\n", | |
| " <td>90</td>\n", | |
| " <td>2018-04-06</td>\n", | |
| " <td>placed</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>96</th>\n", | |
| " <td>97</td>\n", | |
| " <td>89</td>\n", | |
| " <td>2018-04-07</td>\n", | |
| " <td>placed</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>97</th>\n", | |
| " <td>98</td>\n", | |
| " <td>41</td>\n", | |
| " <td>2018-04-07</td>\n", | |
| " <td>placed</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>98</th>\n", | |
| " <td>99</td>\n", | |
| " <td>85</td>\n", | |
| " <td>2018-04-09</td>\n", | |
| " <td>placed</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>99 rows × 4 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " ID USER_ID ORDER_DATE STATUS\n", | |
| "0 1 1 2018-01-01 returned\n", | |
| "1 2 3 2018-01-02 completed\n", | |
| "2 3 94 2018-01-04 completed\n", | |
| "3 4 50 2018-01-05 completed\n", | |
| "4 5 64 2018-01-05 completed\n", | |
| ".. .. ... ... ...\n", | |
| "94 95 27 2018-04-04 placed\n", | |
| "95 96 90 2018-04-06 placed\n", | |
| "96 97 89 2018-04-07 placed\n", | |
| "97 98 41 2018-04-07 placed\n", | |
| "98 99 85 2018-04-09 placed\n", | |
| "\n", | |
| "[99 rows x 4 columns]" | |
| ] | |
| }, | |
| "execution_count": 35, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df_new = pd.read_parquet('https://dbtlabsynapsedatalake.blob.core.windows.net/dbt-quickstart-public/jaffle_shop_orders.parquet')\n", | |
| "print(df_new.dtypes)\n", | |
| "df_new" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python (dev)", | |
| "language": "python", | |
| "name": "dev" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.11.5" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment