fijiaaron · August 2, 2019 00:18
diff --git a/python_data_science_lesson_2.py b/python_data_science_lesson_2.py
 # Python Data Science lesson 2

 # strings
 greeting = "Hello, World!"
 print(greeting)

 # a string is a sequence of characters
 print(greeting[0]) #=> 'H'

 # index starts with 0, so index 5 is the 6th character
 print(greeting[5]) #=> ','

 #len() returns the length of a string
 len(greeting) #=> 13

 # lists
 list = [1, 2, 3] 

 # lists are zero indexed too
 print(list[0]) #=> 1

 # len() can also return the length of a list 
 len(list) #=> 3

 # lists can include different types of values
 list = [1, 2, 'apple'] 
 print(list)

 # you can refer to an individual item in a list
 print(list[0]) #=> 1
 print(list[1]) #=> 2
 print(list[2]) #=> 'apple'

 # you can slice a list 
 print(list[0:2]) #=> [1, 2]

 # but the second value is not zero indexed
 print(list[0:3]) #=> [1, 2, 'apple']

 # you can add items to a list
 list.append('banana') 
 print(list) #=> [1, 2, 'apple', 'banana']

 # you can insert an item at a certain position
 list.insert(2, 'banana')
 print(list) => [1, 2, 'banana', 'apple', 'banana']

 # you can iterate through a list
  for i in list: 
    print(item) 

 # and conditionally modify a list
 for i in range(len(list)): 
 	print(i, list[i]) 
 		if (list[i] == 'banana'): 
 			list.insert(i+1, 'orange') 

 # it doesn't add after last item because the list size has increased but range has already been calculated

 ## Homework 1: Correct this loop to add orange after both instances of banana
 ## don't just correct it by doing this: 
 list.append('orange') 

 ## Homework: Once you have a list that looks like this: 
 [1, 2, 'banana', 'orange', 'apple', 'banana', 'orange']
 ## create a function that removes bananas from the list, so it looks like this:
 [1, 2, 'orange', 'apple', 'orange']

 #### use enumerate to return both the index and an item

 for i, item in enumerate(list): 
 	print(i, item) 
 	if(str(item) == 'banana'): 
 		print('removing banana at ', i) 
 		list.pop(i)

 # i, item is a tuple
 # a tuple is an ordered list of related items
 # Think of it like a row in a database

 # Talk about items() and iteritems() for a dictionary and how it relates to enumerate()

 # Talk about the difference between tabular data that is row based vs columnar based
 # Row based is good for finding a single record: 
 # SQL databases are typically row based
 # 	select name, age, weight from people;

 # Column based is good for doing analytics over all records
 #   tell me the percentage of people who are overweight
 #	with the definition of height < 72 inches and weight > 200 pounds

 # You have to scan every row and aggregate with SQL, but you only have to load a single column to get all people over 200lb

 # That's why data structures like Pandas DataFrame are so useful
 # as opposed to looping through all records
 # DataFrame is like a pivot table in a spreadsheet so that you can look at data either way, more efficiently
	# Python Data Science lesson 2

	# strings
	greeting = "Hello, World!"
	print(greeting)

	# a string is a sequence of characters
	print(greeting[0]) #=> 'H'

	# index starts with 0, so index 5 is the 6th character
	print(greeting[5]) #=> ','

	#len() returns the length of a string
	len(greeting) #=> 13

	# lists
	list = [1, 2, 3]

	# lists are zero indexed too
	print(list[0]) #=> 1

	# len() can also return the length of a list
	len(list) #=> 3

	# lists can include different types of values
	list = [1, 2, 'apple']
	print(list)

	# you can refer to an individual item in a list
	print(list[0]) #=> 1
	print(list[1]) #=> 2
	print(list[2]) #=> 'apple'

	# you can slice a list
	print(list[0:2]) #=> [1, 2]

	# but the second value is not zero indexed
	print(list[0:3]) #=> [1, 2, 'apple']

	# you can add items to a list
	list.append('banana')
	print(list) #=> [1, 2, 'apple', 'banana']

	# you can insert an item at a certain position
	list.insert(2, 'banana')
	print(list) => [1, 2, 'banana', 'apple', 'banana']

	# you can iterate through a list
	for i in list:
	print(item)

	# and conditionally modify a list
	for i in range(len(list)):
	print(i, list[i])
	if (list[i] == 'banana'):
	list.insert(i+1, 'orange')

	# it doesn't add after last item because the list size has increased but range has already been calculated

	## Homework 1: Correct this loop to add orange after both instances of banana
	## don't just correct it by doing this:
	list.append('orange')

	## Homework: Once you have a list that looks like this:
	[1, 2, 'banana', 'orange', 'apple', 'banana', 'orange']
	## create a function that removes bananas from the list, so it looks like this:
	[1, 2, 'orange', 'apple', 'orange']

	#### use enumerate to return both the index and an item

	for i, item in enumerate(list):
	print(i, item)
	if(str(item) == 'banana'):
	print('removing banana at ', i)
	list.pop(i)

	# i, item is a tuple
	# a tuple is an ordered list of related items
	# Think of it like a row in a database

	# Talk about items() and iteritems() for a dictionary and how it relates to enumerate()

	# Talk about the difference between tabular data that is row based vs columnar based
	# Row based is good for finding a single record:
	# SQL databases are typically row based
	# select name, age, weight from people;

	# Column based is good for doing analytics over all records
	# tell me the percentage of people who are overweight
	# with the definition of height < 72 inches and weight > 200 pounds

	# You have to scan every row and aggregate with SQL, but you only have to load a single column to get all people over 200lb

	# That's why data structures like Pandas DataFrame are so useful
	# as opposed to looping through all records
	# DataFrame is like a pivot table in a spreadsheet so that you can look at data either way, more efficiently