Skip to content

Instantly share code, notes, and snippets.

# Read the source tables in Parquet format
sales_table = spark.read.parquet("./data/sales_parquet")
'''
-- Create a temporary table with a few renamings
CREATE TABLE temp_1 AS
SELECT seller_id AS the_seller,
num_pieces_sold AS pieces,
product_id
FROM sales_table;
# Read the source tables in Parquet format
sales_table = spark.read.parquet("./data/sales_parquet")
'''
SELECT product_id,
SUM(num_pieces_sold) AS total_pieces_sold,
AVG(num_pieces_sold) AS average_pieces_sold,
MAX(num_pieces_sold) AS max_pieces_sold_of_product_in_orders,
MIN(num_pieces_sold) AS min_pieces_sold_of_product_in_orders,
COUNT(num_pieces_sold) AS num_times_product_sold
# Read the source tables in Parquet format
sales_table = spark.read.parquet("./data/sales_parquet")
'''
SELECT order_id AS the_order_id,
seller_id AS the_seller_id,
num_pieces_sold AS the_number_of_pieces_sold
FROM sales_table
'''
# Execution Plan and show action in one line
@aialenti
aialenti / select.py
Last active September 13, 2020 14:03
# Read the source tables in Parquet format
sales_table = spark.read.parquet("./data/sales_parquet")
'''
SELECT *
FROM sales_table
'''
# Execution Plan
sales_table_execution_plan = sales_table.select(col("*"))
# Show (Action) - Showing 5 rows with no limit in columns width
# Import Spark
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
# Initialize the Spark session
spark = SparkSession.builder \
.master("local") \
.appName("SparkLikeABoss") \
.getOrCreate()
// Import Spark
import org.apache.spark.sql._
import org.apache.spark.sql.functions._
// Create Spark Session
val spark = SparkSession.builder.
master("local")
.appName("spark session example")
.getOrCreate()
import pandas as pd
import enchant
import nltk
from tqdm import tqdm
from nltk.corpus import stopwords
from nltk.tokenize import RegexpTokenizer
# Declare tokenizers
punctuation_tokenizer = RegexpTokenizer(r'[^\sA-Za-z0-9]')
numbers_tokenizer = RegexpTokenizer(r'[0-9]')
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input
import numpy as np
import os
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from PIL import Image
import pandas as pd
import pickle
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
%%javascript
window.buildChartSelect(element)
window.buildControls(element)