Skip to content

Instantly share code, notes, and snippets.

import pandas as pd
import json
df = pd.read_csv(
's3://airetail/bronze/sfleads/addresses/Addresses.csv'
)
df.address = df['address'].apply(json.loads)
df.phone_numbers = df['phone_numbers']. \
apply(lambda pn: json.loads(pn) if not pd.isnull(pn) else None)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

Instructions

Here are the instructions to execute the series of code snippets to solve the problem.

  • Initialization Process
  • Run SQL Query
  • Execute Python Script
id first_name last_name email gender ip_address
1 Roderic Cristofolo [email protected] Male 216.22.180.231
2 Dunstan Burchard [email protected] Male 18.135.76.159
3 Jeane Gronaver [email protected] Female 40.123.108.201
4 Rebe Tomczynski [email protected] Female 143.142.118.112
5 Mart Flament [email protected] Male 133.92.4.202
6 Wini Thomlinson [email protected] Female 54.248.170.235
7 Peria Bainbrigge [email protected] Female 68.67.104.142
8 Tillie Abbot [email protected] Female 21.114.196.35
9 Hilde Merkle [email protected] Female 72.118.37.139
{"id":1,"first_name":"Weidar","last_name":"Smyth","email":"[email protected]","gender":"Male","phone_numbers":["550-543-4729"],"addresses":[{"street_number":"344","street_name":"Hallows","city":"Ridgely","state":"Maryland","postal_code":"21684"}]}
{"id":2,"first_name":"Emmit","last_name":"Ogborn","email":"[email protected]","gender":"Male","phone_numbers":["374-344-7772","427-353-9104","349-982-6073"],"addresses":[{"street_number":"4","street_name":"Ryan","city":"Waco","state":"Texas","postal_code":"76796"},{"street_number":"7","street_name":"Holmberg","city":"Milwaukee","state":"Wisconsin","postal_code":"53205"}]}
{"id":3,"first_name":"Micah","last_name":"Dadswell","email":"[email protected]","gender":"Male","phone_numbers":["846-266-0132","231-711-7352"],"addresses":[{"street_number":"3","street_name":"Buena Vista","city":"Fort Worth","state":"Texas","postal_code":"76162"},{"street_number":"9865","street_name":"Iowa","city":"New York City","state":"New York","postal_code":"10009"},{"street_number":"6329
import pandas as pd
# Reading order_items
order_items_path = "/Users/itversity/Research/data/retail_db/order_items/part-00000"
order_items_schema = [
"order_item_id",
"order_item_order_id",
"order_item_product_id",
"order_item_quantity",
orders_path = "/Users/itversity/Research/data/retail_db/orders/part-00000"
orders_schema = [
"order_id",
"order_date",
"order_customer_id",
"order_status"
]
orders = pd.read_csv(orders_path,
header=None,
orders_path = "/Users/itversity/Research/data/retail_db/orders/part-00000"
orders_schema = [
"order_id",
"order_date",
"order_customer_id",
"order_status"
]
orders = pd.read_csv(orders_path,
header=None,
names=orders_schema