Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save korkridake/aa92696eaf96c9787c5deb4ba77f0f7d to your computer and use it in GitHub Desktop.
Save korkridake/aa92696eaf96c9787c5deb4ba77f0f7d to your computer and use it in GitHub Desktop.
print(sc)
print(spark)
# <pyspark.sql.session.SparkSession at 0x7f8df8673ba8>
# -------------------------------------------------------------------------------
# Import PySpark Libraries
# -------------------------------------------------------------------------------
import datetime
from datetime import datetime
from pyspark.sql.functions import skewness, kurtosis
from pyspark.sql.functions import var_pop, var_samp, stddev, stddev_pop, sumDistinct, ntile
#'udf' stands for 'user defined function', and is simply a wrapper for functions you write and
#want to apply to a column that knows how to iterate through pySpark dataframe columns. it should
#be more clear after we use it below
from pyspark.sql.functions import udf
from pyspark.sql.functions import col
from pyspark.sql.types import IntegerType
from pyspark.sql.types import StringType
from pyspark.sql.types import DateType
from pyspark.sql import DataFrame
from pyspark.sql import Row
from functools import reduce
# -------------------------------------------------------------------------------
# lambda <args>: <expr>
# -------------------------------------------------------------------------------
def f(x):
'''
Signature: int --> int
Author: @Korkrid Akepanidtaworn
Description: Double the value of a scalar
'''
return x**2
print(f(8)) #64
# In Lambda way, we can write:
g = lambda x: x**2
print(g(8)) # 64
# As you can see both functions do exactly the same and can be used in the same ways.
# - Note that the lambda definition does not include a “return” statement – it always contains a single expression which is returned.
# - Also note that you can put a lambda definition anywhere a function is expected, and you don’t have to assign it to a variable at all.
# - Lambda functions come from functional programming languages and the Lambda Calculus. Since they are so small they may be written on a single line.
# - This is not exactly the same as lambda in functional programming languages, but it is a very powerful concept that’s well integrated into Python.
f = lambda x,y: ["PASS",x,y] if x>3 and y<100 else ["FAIL",x,y]
print(f(4,50))
# ['PASS', 4, 50]
def k(x,y):
'''
Signature: int --> int
Author: @Korkrid Akepanidtaworn
Description: Return pass if (x > 3 and y < 100), otherwise return fail.
'''
if (x>3) & (y<100):
return ["PASS",x,y]
else:
return ["FAIL",x,y]
print(k(4,50)) # ['PASS', 4, 50]
print(k(60,130)) # ['FAIL', 60, 130]
print(k(2,150)) # ['FAIL', 2, 150]
# -------------------------------------------------------------------------------
# Challenge 1:
# Write a Lambda function and use it to sort pairs by key using their names. You will be using the list.sort() method of a list. It modifies the list in-place (here pairs)and
# has a key parameter to specify a function to be called on each list element prior to making comparisons. The value of the key parameter is a function that takes
# a single argument and returns a key to use for sorting purposes. Define this function as a Lambda function.
# -------------------------------------------------------------------------------
pairs = [(1, 'one'), (2, 'two'), (3, 'three'), (4, 'four'), (5, 'five'), (6, 'six'), (7, 'jk rowling')]
pairs.sort(key=lambda pair: pair[1])
pairs
# Out[20]:
# [(5, 'five'),
# (4, 'four'),
# (7, 'jk rowling'),
# (1, 'one'),
# (6, 'six'),
# (3, 'three'),
# (2, 'two')]
pairs.sort(key=lambda pair: pair[1], reverse = True)
pairs
# Out[27]:
# [(2, 'two'),
# (3, 'three'),
# (6, 'six'),
# (1, 'one'),
# (7, 'jk rowling'),
# (4, 'four'),
# (5, 'five')]
pairs.sort(key=lambda pair: pair[0], reverse = True)
pairs
# Out[25]:
# [(7, 'jk rowling'),
# (6, 'six'),
# (5, 'five'),
# (4, 'four'),
# (3, 'three'),
# (2, 'two'),
# (1, 'one')]
pairs.sort(key=lambda pair: pair[0])
pairs
# [(1, 'one'),
# (2, 'two'),
# (3, 'three'),
# (4, 'four'),
# (5, 'five'),
# (6, 'six'),
# (7, 'jk rowling')]
# -------------------------------------------------------------------------------
# map, filter and reduce in python
# Map takes a function f and an array as input parameters and outputs an array where f is applied to every element. In this respect, using map is equivalent to for loops.
# For instance, to convert a list of temperatures in Celsius to a list of temperature in Kelvin:
# -------------------------------------------------------------------------------
temp_c = [10, 3, -5, 25, 1, 9, 29, -10, 5]
temp_K = list(map(lambda x: x + 273.15, temp_c))
list(temp_K)
# Out[28]: [283.15, 276.15, 268.15, 298.15, 274.15, 282.15, 302.15, 263.15, 278.15]
# map() is a function with two arguments:
# r = map(func, seq)
# The first argument func is the name of a function and the second a sequence (e.g. a list) seq. map() applies the function func to all the elements of the sequence seq.
# It returns a new list with the elements changed by func.
# Let’s define a list of words: list_words = [“big”,”small”, “able”, “about”, “hairdresser”, “laboratory”]
# Use a map function to print the number of character of each word:
list_words = ['big', 'small', 'able', 'about', 'hairdresser', 'laboratory']
list_num_char_for_each_word = list(map(lambda k: len(k), list_words))
print(list(list_num_char_for_each_word)) # [3, 5, 4, 5, 11, 10]
# or even shorter
print(list(map(len,list_words))) # [3, 5, 4, 5, 11, 10]
# -------------------------------------------------------------------------------
# Filter
# -------------------------------------------------------------------------------
# As the name suggests, filter can be used to filter your data. It tests each element of your input data and
# returns a subset of it for which a condition given by a function is TRUE. It does not modify your input data.
numbers = range(-15, 15)
less_than_zero = list(filter(lambda x: x < 0, numbers))
print(less_than_zero) # [-15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1]
# Reuse numbers and extract all the odd numbers:
# numbers = range(-15, 15)
numbers = range(-15, 15)
odd_nums = list(filter(lambda x: x % 2 == 1, numbers))
print(odd_nums) # [-15, -13, -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11, 13]
# Reuse numbers and extract all the even numbers:
numbers = range(-15, 15)
even_numbers = list(filter(lambda x: x % 2 == 0, numbers))
print(even_numbers) # [-14, -12, -10, -8, -6, -4, -2, 0, 2, 4, 6, 8, 10, 12, 14]
# -------------------------------------------------------------------------------
# Reduce
# -------------------------------------------------------------------------------
# Reduce takes a function f and an array as input. The function f gets two input parameters that work on individual elements of the array.
# Reduce combines every two elements of the array using the function f. Let’s take an example:
# we define a list of integers
numbers = [1, 4, 6, 2, 9, 10]
# Define a new function combine
# Convert x and y to strings and create a tuple from x,y
def combine(x,y):
return "(" + str(x) + ", " + str(y) + ")"
# Use reduce to apply combine to numbers
from functools import reduce
print(numbers)
reduce(combine,numbers)
# [1, 4, 6, 2, 9, 10]
# Out[38]: '(((((1, 4), 6), 2), 9), 10)'
# we define a list of integers
numbers = [1, 4, 6, 2, 9, 10]
# Use reduce to combine numbers
from functools import reduce
print(numbers)
reduce(lambda x,y: "(" + str(x) + ", " + str(y) + ")",numbers)
# [1, 4, 6, 2, 9, 10]
# Out[38]: '(((((1, 4), 6), 2), 9), 10)'
# -----------------------------------------------------------------------------
# Challenge 4:
# Let’s define a string variable sentence:
sentence = "Dis-moi ce que tu manges, je te dirai ce que tu es."
# Compute the number of words in sentence
# -----------------------------------------------------------------------------
import string
no_punctuation=sentence.translate(str.maketrans("","",string.punctuation))
reduce(lambda x,y: x+y, map(lambda x: 1, no_punctuation.split())) # 12
def wordCount(mystring):
'''
Signature: str --> int
Author: Darrell White
Description: return the word count in a sentence
Link: https://stackoverflow.com/questions/19410018/how-to-count-the-number-of-words-in-a-sentence-ignoring-numbers-punctuation-an
'''
tempcount = 0
count = 1
try:
for character in mystring:
if character == " ":
tempcount +=1
if tempcount ==1:
count +=1
else:
tempcount +=1
else:
tempcount=0
return count
except Exception:
error = "Not a string"
return error
mystring = "The ones who see things differently. They're not fond of rules. And they have no respect for the status quo."
print(wordCount(mystring)) # 20
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment