Created
November 23, 2018 16:08
-
-
Save korkridake/aa92696eaf96c9787c5deb4ba77f0f7d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print(sc) | |
print(spark) | |
# <pyspark.sql.session.SparkSession at 0x7f8df8673ba8> | |
# ------------------------------------------------------------------------------- | |
# Import PySpark Libraries | |
# ------------------------------------------------------------------------------- | |
import datetime | |
from datetime import datetime | |
from pyspark.sql.functions import skewness, kurtosis | |
from pyspark.sql.functions import var_pop, var_samp, stddev, stddev_pop, sumDistinct, ntile | |
#'udf' stands for 'user defined function', and is simply a wrapper for functions you write and | |
#want to apply to a column that knows how to iterate through pySpark dataframe columns. it should | |
#be more clear after we use it below | |
from pyspark.sql.functions import udf | |
from pyspark.sql.functions import col | |
from pyspark.sql.types import IntegerType | |
from pyspark.sql.types import StringType | |
from pyspark.sql.types import DateType | |
from pyspark.sql import DataFrame | |
from pyspark.sql import Row | |
from functools import reduce | |
# ------------------------------------------------------------------------------- | |
# lambda <args>: <expr> | |
# ------------------------------------------------------------------------------- | |
def f(x): | |
''' | |
Signature: int --> int | |
Author: @Korkrid Akepanidtaworn | |
Description: Double the value of a scalar | |
''' | |
return x**2 | |
print(f(8)) #64 | |
# In Lambda way, we can write: | |
g = lambda x: x**2 | |
print(g(8)) # 64 | |
# As you can see both functions do exactly the same and can be used in the same ways. | |
# - Note that the lambda definition does not include a “return” statement – it always contains a single expression which is returned. | |
# - Also note that you can put a lambda definition anywhere a function is expected, and you don’t have to assign it to a variable at all. | |
# - Lambda functions come from functional programming languages and the Lambda Calculus. Since they are so small they may be written on a single line. | |
# - This is not exactly the same as lambda in functional programming languages, but it is a very powerful concept that’s well integrated into Python. | |
f = lambda x,y: ["PASS",x,y] if x>3 and y<100 else ["FAIL",x,y] | |
print(f(4,50)) | |
# ['PASS', 4, 50] | |
def k(x,y): | |
''' | |
Signature: int --> int | |
Author: @Korkrid Akepanidtaworn | |
Description: Return pass if (x > 3 and y < 100), otherwise return fail. | |
''' | |
if (x>3) & (y<100): | |
return ["PASS",x,y] | |
else: | |
return ["FAIL",x,y] | |
print(k(4,50)) # ['PASS', 4, 50] | |
print(k(60,130)) # ['FAIL', 60, 130] | |
print(k(2,150)) # ['FAIL', 2, 150] | |
# ------------------------------------------------------------------------------- | |
# Challenge 1: | |
# Write a Lambda function and use it to sort pairs by key using their names. You will be using the list.sort() method of a list. It modifies the list in-place (here pairs)and | |
# has a key parameter to specify a function to be called on each list element prior to making comparisons. The value of the key parameter is a function that takes | |
# a single argument and returns a key to use for sorting purposes. Define this function as a Lambda function. | |
# ------------------------------------------------------------------------------- | |
pairs = [(1, 'one'), (2, 'two'), (3, 'three'), (4, 'four'), (5, 'five'), (6, 'six'), (7, 'jk rowling')] | |
pairs.sort(key=lambda pair: pair[1]) | |
pairs | |
# Out[20]: | |
# [(5, 'five'), | |
# (4, 'four'), | |
# (7, 'jk rowling'), | |
# (1, 'one'), | |
# (6, 'six'), | |
# (3, 'three'), | |
# (2, 'two')] | |
pairs.sort(key=lambda pair: pair[1], reverse = True) | |
pairs | |
# Out[27]: | |
# [(2, 'two'), | |
# (3, 'three'), | |
# (6, 'six'), | |
# (1, 'one'), | |
# (7, 'jk rowling'), | |
# (4, 'four'), | |
# (5, 'five')] | |
pairs.sort(key=lambda pair: pair[0], reverse = True) | |
pairs | |
# Out[25]: | |
# [(7, 'jk rowling'), | |
# (6, 'six'), | |
# (5, 'five'), | |
# (4, 'four'), | |
# (3, 'three'), | |
# (2, 'two'), | |
# (1, 'one')] | |
pairs.sort(key=lambda pair: pair[0]) | |
pairs | |
# [(1, 'one'), | |
# (2, 'two'), | |
# (3, 'three'), | |
# (4, 'four'), | |
# (5, 'five'), | |
# (6, 'six'), | |
# (7, 'jk rowling')] | |
# ------------------------------------------------------------------------------- | |
# map, filter and reduce in python | |
# Map takes a function f and an array as input parameters and outputs an array where f is applied to every element. In this respect, using map is equivalent to for loops. | |
# For instance, to convert a list of temperatures in Celsius to a list of temperature in Kelvin: | |
# ------------------------------------------------------------------------------- | |
temp_c = [10, 3, -5, 25, 1, 9, 29, -10, 5] | |
temp_K = list(map(lambda x: x + 273.15, temp_c)) | |
list(temp_K) | |
# Out[28]: [283.15, 276.15, 268.15, 298.15, 274.15, 282.15, 302.15, 263.15, 278.15] | |
# map() is a function with two arguments: | |
# r = map(func, seq) | |
# The first argument func is the name of a function and the second a sequence (e.g. a list) seq. map() applies the function func to all the elements of the sequence seq. | |
# It returns a new list with the elements changed by func. | |
# Let’s define a list of words: list_words = [“big”,”small”, “able”, “about”, “hairdresser”, “laboratory”] | |
# Use a map function to print the number of character of each word: | |
list_words = ['big', 'small', 'able', 'about', 'hairdresser', 'laboratory'] | |
list_num_char_for_each_word = list(map(lambda k: len(k), list_words)) | |
print(list(list_num_char_for_each_word)) # [3, 5, 4, 5, 11, 10] | |
# or even shorter | |
print(list(map(len,list_words))) # [3, 5, 4, 5, 11, 10] | |
# ------------------------------------------------------------------------------- | |
# Filter | |
# ------------------------------------------------------------------------------- | |
# As the name suggests, filter can be used to filter your data. It tests each element of your input data and | |
# returns a subset of it for which a condition given by a function is TRUE. It does not modify your input data. | |
numbers = range(-15, 15) | |
less_than_zero = list(filter(lambda x: x < 0, numbers)) | |
print(less_than_zero) # [-15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1] | |
# Reuse numbers and extract all the odd numbers: | |
# numbers = range(-15, 15) | |
numbers = range(-15, 15) | |
odd_nums = list(filter(lambda x: x % 2 == 1, numbers)) | |
print(odd_nums) # [-15, -13, -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11, 13] | |
# Reuse numbers and extract all the even numbers: | |
numbers = range(-15, 15) | |
even_numbers = list(filter(lambda x: x % 2 == 0, numbers)) | |
print(even_numbers) # [-14, -12, -10, -8, -6, -4, -2, 0, 2, 4, 6, 8, 10, 12, 14] | |
# ------------------------------------------------------------------------------- | |
# Reduce | |
# ------------------------------------------------------------------------------- | |
# Reduce takes a function f and an array as input. The function f gets two input parameters that work on individual elements of the array. | |
# Reduce combines every two elements of the array using the function f. Let’s take an example: | |
# we define a list of integers | |
numbers = [1, 4, 6, 2, 9, 10] | |
# Define a new function combine | |
# Convert x and y to strings and create a tuple from x,y | |
def combine(x,y): | |
return "(" + str(x) + ", " + str(y) + ")" | |
# Use reduce to apply combine to numbers | |
from functools import reduce | |
print(numbers) | |
reduce(combine,numbers) | |
# [1, 4, 6, 2, 9, 10] | |
# Out[38]: '(((((1, 4), 6), 2), 9), 10)' | |
# we define a list of integers | |
numbers = [1, 4, 6, 2, 9, 10] | |
# Use reduce to combine numbers | |
from functools import reduce | |
print(numbers) | |
reduce(lambda x,y: "(" + str(x) + ", " + str(y) + ")",numbers) | |
# [1, 4, 6, 2, 9, 10] | |
# Out[38]: '(((((1, 4), 6), 2), 9), 10)' | |
# ----------------------------------------------------------------------------- | |
# Challenge 4: | |
# Let’s define a string variable sentence: | |
sentence = "Dis-moi ce que tu manges, je te dirai ce que tu es." | |
# Compute the number of words in sentence | |
# ----------------------------------------------------------------------------- | |
import string | |
no_punctuation=sentence.translate(str.maketrans("","",string.punctuation)) | |
reduce(lambda x,y: x+y, map(lambda x: 1, no_punctuation.split())) # 12 | |
def wordCount(mystring): | |
''' | |
Signature: str --> int | |
Author: Darrell White | |
Description: return the word count in a sentence | |
Link: https://stackoverflow.com/questions/19410018/how-to-count-the-number-of-words-in-a-sentence-ignoring-numbers-punctuation-an | |
''' | |
tempcount = 0 | |
count = 1 | |
try: | |
for character in mystring: | |
if character == " ": | |
tempcount +=1 | |
if tempcount ==1: | |
count +=1 | |
else: | |
tempcount +=1 | |
else: | |
tempcount=0 | |
return count | |
except Exception: | |
error = "Not a string" | |
return error | |
mystring = "The ones who see things differently. They're not fond of rules. And they have no respect for the status quo." | |
print(wordCount(mystring)) # 20 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment