Akash-Ansari · March 1, 2024 08:45
diff --git a/Importing Data in Python (Part 1) Commands-Notes b/Importing Data in Python (Part 1) Commands-Notes

 # Reading a text file
 # Steps:

 filename = "Huck_finn.txt"
 file = open(filename, mode = "r")
 text = file.read()
 print(text)
 print(file.closed) # returns boolean value; indicates that the file is closed or not.
 file.close() # closes the file


 # It is good to use a context manager since it automatically closes the 
 # file

 with open("huck_finn.txt", "r") as file:
 	print(file.read())



 # Read & print the first 3 lines
 with open('moby_dick.txt') as file:
    print(file.readline())
    print(file.readline())
    print(file.readline())


 # Flat files = are files with records....like txt file with row column, csv files
 # relational databases are not flat files.



 ######################################### Using numpy to import flat files ############################################
 # For all-numeric dataset, we can use numpy "loadtxt" function.
 # The first argument will be the filename.
 # The second will be the delimiter which, in this case, is a comma.

 # Import package
 import numpy as np

 # Assign filename to variable: file
 file = 'digits.csv'

 # Load file as array: digits
 digits = np.loadtxt(file, delimiter=",")

 # Print datatype of digits
 print(type(digits))

 # Select and reshape a row
 im = digits[21, 1:]
 im_sq = np.reshape(im, (28, 28))

 # Plot reshaped data (matplotlib.pyplot already loaded as plt)
 plt.imshow(im_sq, cmap='Greys', interpolation='nearest')
 plt.show()



 ################################################ Customizing your NumPy import ##########################################

 # What if there are rows, such as a header, that you don't want to import? What if your file has a 
 # delimiter other than a comma? What if you only wish to import particular columns?


 # There are a number of arguments that np.loadtxt() takes that you'll find useful: 
 # delimiter changes the delimiter that loadtxt() is expecting, for example, you can use ',' and 
 # '\t' for comma-delimited and tab-delimited respectively; skiprows allows you to specify how many 
 # rows (not indices) you wish to skip; usecols takes a list of the indices of the columns you wish to keep.




 # Import numpy
 import numpy as np

 # Assign the filename: file
 file = 'digits_header.txt'

 # Load the data: data
 data = np.loadtxt(file, delimiter="\t", skiprows=1, usecols=[0,2])

 # Print data
 print(data)





 ########################################## Importing different datatypes ###############################################

 # The file seaslug.txt has a text header, consisting of strings is tab-delimited.
 # These data consists of percentage of sea slug larvae that had metamorphosed in a given time period. Read more here.
 # (http://www.stat.ucla.edu/projects/datasets/seaslug-explanation.html)

 # Due to the header, if you tried to import it as-is using np.loadtxt(), Python would throw you a ValueError and tell you 
 # that it could not convert string to float. There are two ways to deal with this: firstly, you can set the data type argument 
 # dtype equal to str (for string).

 # Alternatively, you can skip the first row as we have seen before, using the skiprows argument.


 # Assign filename: file
 file = 'seaslug.txt'

 # Import file: data
 data = np.loadtxt(file, delimiter='\t', dtype=str)

 # Print the first element of data
 print(data[0])

 # Import data as floats and skip the first row: data_float
 data_float = np.loadtxt(file, delimiter='\t', dtype='float', skiprows=1)

 # Print the 10th element of data_float
 print(data_float[9])

 # Plot a scatterplot of the data
 plt.scatter(data_float[:, 0], data_float[:, 1])
 plt.xlabel('time (min.)')
 plt.ylabel('percentage of larvae')
 plt.show()




 ##################################### Working with mixed datatypes (1) #######################################

 # Much of the time you will need to import datasets which have different datatypes in different columns; one column may contain 
 # strings and another floats, for example. The function np.loadtxt() will freak at this. There is another function, 
 # np.genfromtxt(), which can handle such structures. If we pass dtype=None to it, it will figure out what types each column should be.

 # Import 'titanic.csv' using the function np.genfromtxt() as follows:

 data = np.genfromtxt('titanic.csv', delimiter=',', names=True, dtype=None)


 # Here, the first argument is the filename, the second specifies the delimiter , and the third argument names tells us
 # there is a header. Because the data are of different types, data is an object called a structured array. Because numpy 
 # arrays have to contain elements that are all the same type, the structured array solves this by being a 1D array, where 
 # each element of the array is a row of the flat file imported. You can test this by checking out the array's shape in the 
 # shell by executing np.shape(data).

 data['Fare'][5]
 8.4582999999999995

 data['Fare'][0:5]
 array([  7.25  ,  71.2833,   7.925 ,  53.1   ,   8.05  ])


 # Acccessing rows and columns of structured arrays is super-intuitive: to get the ith row, merely execute data[i] and to get the 
 # column with name 'Fare', execute data['Fare'].

 # Print the entire column with name Survived to the shell. What are the last 4 values of this column?

 data["Survived"][-4:]




 # Working with mixed datatypes (2)
 # You have just used np.genfromtxt() to import data containing mixed datatypes. There is also another function 
 # np.recfromcsv() that behaves similarly to np.genfromtxt(), except that its default dtype is None. 

 # Assign the filename: file
 file = 'titanic.csv'

 # Import file using np.recfromcsv: d
 d = np.recfromcsv(file, delimiter = ',', names = True, dtype = None)

 # Print out first three entries of d
 print(d[:3])




 #################################### Using pandas to import flat files as DataFrames #######################################

 # In the last exercise, you were able to import flat files containing columns with different datatypes as numpy arrays. 
 # However, the DataFrame object in pandas is a more appropriate structure in which to store such data and, thankfully,
 # we can easily import files of mixed data types as DataFrames using the pandas functions read_csv() and read_table().

 # Import pandas as pd
 import pandas as pd

 # Assign the filename: file
 file = 'titanic.csv'

 # Read the file into a DataFrame: df
 df = pd.read_csv(file)

 # View the head of the DataFrame
 df.head()



 # In the last exercise, you were able to import flat files into a pandas DataFrame. As a bonus, it is then straightforward to 
 # retrieve the corresponding numpy array using the attribute values.
 # You'll now have a chance to do this using the MNIST dataset, which is available as digits.csv.

 # Assign the filename: file
 file = 'digits.csv'

 # Read the first 5 rows of the file into a DataFrame: data
 data = pd.read_csv(file, nrows = 5, header = None)

 # Build a numpy array from the DataFrame: data_array
 data_array = np.array(data.values)

 # Print the datatype of data_array to the shell
 print(type(data_array))





 ################################################# Customizing your pandas import ##########################################

 # The pandas package is also great at dealing with many of the issues you will encounter when importing data as a data scientist, 
 # such as comments occurring in flat files, empty lines and missing values. Note that missing values are also commonly referred to 
 # as NA or NaN. To wrap up this chapter, you're now going to import a slightly corrupted copy of the Titanic dataset 
 # titanic_corrupt.txt, which
 # contains comments after the character '#'
 # is tab-delimited.

 # Import matplotlib.pyplot as plt
 import matplotlib.pyplot as plt

 # Assign filename: file
 file = 'titanic_corrupt.txt'

 # Import file: data
 data = pd.read_csv(file, sep='\t', comment='#', na_values='Nothing')

 # Print the head of the DataFrame
 print(data.head())

 # Plot 'Age' variable in a histogram
 pd.DataFrame.hist(data[['Age']])
 plt.xlabel('Age (years)')
 plt.ylabel('count')
 plt.show()



 ######################################### Working with relational databases in Python ######################################


 ######## What are the tables in the database? #######
 # In this exercise, you'll once again create an engine to connect to 'Chinook.sqlite'. Before you can get any data out of the database, 
 # however, you'll need to know what tables it contains!

 # To this end, you'll save the table names to a list using the method table_names() on the engine and then you will print the list.



 # Import necessary module
 from sqlalchemy import create_engine

 # Create engine: engine
 engine = create_engine("sqlite:///Chinook.sqlite")

 # Save the table names to a list: table_names
 table_names = engine.table_names()

 # Print the table names to the shell
 print(table_names)



 ############################## The Hello World of SQL Queries! ##########################
 # Now, it's time for liftoff! In this exercise, you'll perform the Hello World of SQL queries, SELECT, in order to retrieve all 
 # columns of the table Album in the Chinook database. Recall that the query SELECT * selects all columns.


 # Import packages
 from sqlalchemy import create_engine
 import pandas as pd

 # Create engine: engine
 engine = create_engine('sqlite:///Chinook.sqlite')

 # Open engine connection: con
 con = engine.connect()

 # Perform query: rs
 rs = con.execute("SELECT * from Album")

 # Save results of the query to DataFrame: df
 df = pd.DataFrame(rs.fetchall())

 # Close connection
 con.close()

 # Print head of DataFrame df
 print(df.head())




 ############################ Customizing the Hello World of SQL Queries ############################################
 # Congratulations on executing your first SQL query! Now you're going to figure out how to customize your query in order to:

 # Select specified columns from a table;
 # Select a specified number of rows;
 # Import column names from the database table.
 # Recall that Hugo performed a very similar query customization in the video:



 # Open engine in context manager
 # Perform query and save results to DataFrame: df
 with engine.connect() as con:
    rs = con.execute("SELECT LastName, Title from Employee")
    df = pd.DataFrame(rs.fetchmany(size = 3))
    df.columns = rs.keys()

 # Print the length of the DataFrame df
 print(len(df))

 # Print the head of the DataFrame df
 print(df.head())




 ############################### Filtering your database records using SQL's WHERE #############################
 # You can now execute a basic SQL query to select records from any table in your database and you can also perform simple query 
 # customizations to select particular columns and numbers of rows.

 # There are a couple more standard SQL query chops that will aid you in your journey to becoming an SQL ninja.

 # Let's say, for example that you wanted to get all records from the Customer table of the Chinook database for which the Country 
 # is 'Canada'. You can do this very easily in SQL using a SELECT statement followed by a WHERE clause as follows:

 # SELECT * FROM Customer WHERE Country = 'Canada'
 # In fact, you can filter any SELECT statement by any condition 



 # Create engine: engine
 engine = create_engine('sqlite:///Chinook.sqlite')

 # Open engine in context manager
 # Perform query and save results to DataFrame: df
 with engine.connect() as con:
    rs = con.execute('SELECT * from Employee WHERE EmployeeId >= 6')
    df = pd.DataFrame(rs.fetchall())
    df.columns = rs.keys()

 # Print the head of the DataFrame df
 print(df.head())





 ######################################## Ordering your SQL records with ORDER BY ################################
 # You can also order your SQL query results. For example, if you wanted to get all records from the Customer table of the Chinook 
 # database and order them in increasing order by the column SupportRepId, you could do so with the following query:

 # "SELECT * FROM Customer ORDER BY SupportRepId"
 # In fact, you can order any SELECT statement by any column.

 # In this interactive exercise, you'll select all records of the Employee table and order them in increasing order by the column BirthDate.

 # Packages are already imported as follows:

 # import pandas as pd
 # from sqlalchemy import create_engine
 # Get querying!


 # Create engine: engine
 engine = create_engine('sqlite:///Chinook.sqlite')

 # Open engine in context manager
 with engine.connect() as con:
    rs = con.execute('SELECT * FROM Employee ORDER BY BirthDate')
    df = pd.DataFrame(rs.fetchall())

    # Set the DataFrame's column names
    df.columns = rs.keys()


 # Print head of DataFrame
 print(df.head())



 ########################## Pandas and The Hello World of SQL Queries! #########################################
 # Here, you'll take advantage of the power of pandas to write the results of your SQL query to a DataFrame in one swift line of 
 # Python code!

 # You'll first import pandas and create the SQLite 'Chinook.sqlite' engine. Then you'll query the database to select all records 
 # from the Album table.

 # Recall that to select all records from the Orders table in the Northwind database, Hugo executed the following command:

 # df = pd.read_sql_query("SELECT * FROM Orders", engine)



 # Import packages
 from sqlalchemy import create_engine
 import pandas as pd

 # Create engine: engine
 engine = create_engine('sqlite:///Chinook.sqlite')

 # Execute query and store records in DataFrame: df
 df = pd.read_sql_query('SELECT * FROM Album', engine)

 # Print head of DataFrame
 print(df.head())

 # Open engine in context manager
 # Perform query and save results to DataFrame: df1
 with engine.connect() as con:
    rs = con.execute("SELECT * FROM Album")
    df1 = pd.DataFrame(rs.fetchall())
    df1.columns = rs.keys()

 # Confirm that both methods yield the same result: does df = df1 ?
 print(df.equals(df1))





 ########################################### Pandas for more complex querying ###############################################
 # Here, you'll become more familiar with the pandas function read_sql_query() 
 # by using it to execute a more complex query: a SELECT statement followed by both a WHERE clause AND an ORDER BY clause.

 # You'll build a DataFrame that contains the rows of the Employee table for which the EmployeeId is greater than or equal to 6 and 
 # you'll order these entries by BirthDate.


 # Import packages
 from sqlalchemy import create_engine
 import pandas as pd

 # Create engine: engine
 engine = create_engine('sqlite:///Chinook.sqlite')

 # Execute query and store records in DataFrame: df
 df = pd.read_sql_query(
    'SELECT * FROM Employee WHERE EmployeeId >= 6 ORDER BY BirthDate', engine)

 # Print head of DataFrame
 print(df.head())





 ##################################### The power of SQL lies in relationships between tables: INNER JOIN ############################
 # Here, you'll perform your first INNER JOIN! You'll be working with your favourite SQLite database, Chinook.sqlite. 
 # For each record in the Album table, you'll extract the Title along with the Name of the Artist. The latter will come from the 
 # Artist table and so you will need to INNER JOIN these two tables on the ArtistID column of both.

 # Recall that to INNER JOIN the Orders and Customers tables from the Northwind database, Hugo executed the following SQL query:

 # "SELECT OrderID, CompanyName FROM Orders INNER JOIN Customers on Orders.CustomerID = Customers.CustomerID"
 # The following code has already been executed to import the necessary packages and to create the engine:

 # import pandas as pd
 # from sqlalchemy import create_engine
 # engine = create_engine('sqlite:///Chinook.sqlite')



 # Open engine in context manager
 # Perform query and save results to DataFrame: df
 with engine.connect() as con:
    rs  = con.execute(
        'SELECT Title, Name FROM Album INNER JOIN Artist on Album.ArtistID = Artist.ArtistID',      
        )
    df = pd.DataFrame(rs.fetchall())
    df.columns = rs.keys()

 # Print head of DataFrame df
 print(df.head())





 ################################################### Filtering your INNER JOIN ###########################################
 # Congrats on performing your first INNER JOIN! You're now going to finish this chapter with one final exercise in which you 
 # perform an INNER JOIN and filter the result using a WHERE clause.

 # Recall that to INNER JOIN the Orders and Customers tables from the Northwind database, Hugo executed the following SQL query:

 # "SELECT OrderID, CompanyName FROM Orders INNER JOIN Customers on Orders.CustomerID = Customers.CustomerID"
 # The following code has already been executed to import the neccesary packages and to create the engine:

 # import pandas as pd
 # from sqlalchemy import create_engine
 # engine = create_engine('sqlite:///Chinook.sqlite')


 # Execute query and store records in DataFrame: df
 df = pd.read_sql_query('SELECT * FROM PlaylistTrack INNER JOIN Track on PlaylistTrack.TrackId = Track.TrackId WHERE Milliseconds < 250000', engine)

 # Print head of DataFrame
 print(df.head())

	# Reading a text file
	# Steps:

	filename = "Huck_finn.txt"
	file = open(filename, mode = "r")
	text = file.read()
	print(text)
	print(file.closed) # returns boolean value; indicates that the file is closed or not.
	file.close() # closes the file


	# It is good to use a context manager since it automatically closes the
	# file

	with open("huck_finn.txt", "r") as file:
	print(file.read())



	# Read & print the first 3 lines
	with open('moby_dick.txt') as file:
	print(file.readline())
	print(file.readline())
	print(file.readline())


	# Flat files = are files with records....like txt file with row column, csv files
	# relational databases are not flat files.



	######################################### Using numpy to import flat files ############################################
	# For all-numeric dataset, we can use numpy "loadtxt" function.
	# The first argument will be the filename.
	# The second will be the delimiter which, in this case, is a comma.

	# Import package
	import numpy as np

	# Assign filename to variable: file
	file = 'digits.csv'

	# Load file as array: digits
	digits = np.loadtxt(file, delimiter=",")

	# Print datatype of digits
	print(type(digits))

	# Select and reshape a row
	im = digits[21, 1:]
	im_sq = np.reshape(im, (28, 28))

	# Plot reshaped data (matplotlib.pyplot already loaded as plt)
	plt.imshow(im_sq, cmap='Greys', interpolation='nearest')
	plt.show()



	################################################ Customizing your NumPy import ##########################################

	# What if there are rows, such as a header, that you don't want to import? What if your file has a
	# delimiter other than a comma? What if you only wish to import particular columns?


	# There are a number of arguments that np.loadtxt() takes that you'll find useful:
	# delimiter changes the delimiter that loadtxt() is expecting, for example, you can use ',' and
	# '\t' for comma-delimited and tab-delimited respectively; skiprows allows you to specify how many
	# rows (not indices) you wish to skip; usecols takes a list of the indices of the columns you wish to keep.




	# Import numpy
	import numpy as np

	# Assign the filename: file
	file = 'digits_header.txt'

	# Load the data: data
	data = np.loadtxt(file, delimiter="\t", skiprows=1, usecols=[0,2])

	# Print data
	print(data)





	########################################## Importing different datatypes ###############################################

	# The file seaslug.txt has a text header, consisting of strings is tab-delimited.
	# These data consists of percentage of sea slug larvae that had metamorphosed in a given time period. Read more here.
	# (http://www.stat.ucla.edu/projects/datasets/seaslug-explanation.html)

	# Due to the header, if you tried to import it as-is using np.loadtxt(), Python would throw you a ValueError and tell you
	# that it could not convert string to float. There are two ways to deal with this: firstly, you can set the data type argument
	# dtype equal to str (for string).

	# Alternatively, you can skip the first row as we have seen before, using the skiprows argument.


	# Assign filename: file
	file = 'seaslug.txt'

	# Import file: data
	data = np.loadtxt(file, delimiter='\t', dtype=str)

	# Print the first element of data
	print(data[0])

	# Import data as floats and skip the first row: data_float
	data_float = np.loadtxt(file, delimiter='\t', dtype='float', skiprows=1)

	# Print the 10th element of data_float
	print(data_float[9])

	# Plot a scatterplot of the data
	plt.scatter(data_float[:, 0], data_float[:, 1])
	plt.xlabel('time (min.)')
	plt.ylabel('percentage of larvae')
	plt.show()




	##################################### Working with mixed datatypes (1) #######################################

	# Much of the time you will need to import datasets which have different datatypes in different columns; one column may contain
	# strings and another floats, for example. The function np.loadtxt() will freak at this. There is another function,
	# np.genfromtxt(), which can handle such structures. If we pass dtype=None to it, it will figure out what types each column should be.

	# Import 'titanic.csv' using the function np.genfromtxt() as follows:

	data = np.genfromtxt('titanic.csv', delimiter=',', names=True, dtype=None)


	# Here, the first argument is the filename, the second specifies the delimiter , and the third argument names tells us
	# there is a header. Because the data are of different types, data is an object called a structured array. Because numpy
	# arrays have to contain elements that are all the same type, the structured array solves this by being a 1D array, where
	# each element of the array is a row of the flat file imported. You can test this by checking out the array's shape in the
	# shell by executing np.shape(data).

	data['Fare'][5]
	8.4582999999999995

	data['Fare'][0:5]
	array([ 7.25 , 71.2833, 7.925 , 53.1 , 8.05 ])


	# Acccessing rows and columns of structured arrays is super-intuitive: to get the ith row, merely execute data[i] and to get the
	# column with name 'Fare', execute data['Fare'].

	# Print the entire column with name Survived to the shell. What are the last 4 values of this column?

	data["Survived"][-4:]




	# Working with mixed datatypes (2)
	# You have just used np.genfromtxt() to import data containing mixed datatypes. There is also another function
	# np.recfromcsv() that behaves similarly to np.genfromtxt(), except that its default dtype is None.

	# Assign the filename: file
	file = 'titanic.csv'

	# Import file using np.recfromcsv: d
	d = np.recfromcsv(file, delimiter = ',', names = True, dtype = None)

	# Print out first three entries of d
	print(d[:3])




	#################################### Using pandas to import flat files as DataFrames #######################################

	# In the last exercise, you were able to import flat files containing columns with different datatypes as numpy arrays.
	# However, the DataFrame object in pandas is a more appropriate structure in which to store such data and, thankfully,
	# we can easily import files of mixed data types as DataFrames using the pandas functions read_csv() and read_table().

	# Import pandas as pd
	import pandas as pd

	# Assign the filename: file
	file = 'titanic.csv'

	# Read the file into a DataFrame: df
	df = pd.read_csv(file)

	# View the head of the DataFrame
	df.head()



	# In the last exercise, you were able to import flat files into a pandas DataFrame. As a bonus, it is then straightforward to
	# retrieve the corresponding numpy array using the attribute values.
	# You'll now have a chance to do this using the MNIST dataset, which is available as digits.csv.

	# Assign the filename: file
	file = 'digits.csv'

	# Read the first 5 rows of the file into a DataFrame: data
	data = pd.read_csv(file, nrows = 5, header = None)

	# Build a numpy array from the DataFrame: data_array
	data_array = np.array(data.values)

	# Print the datatype of data_array to the shell
	print(type(data_array))





	################################################# Customizing your pandas import ##########################################

	# The pandas package is also great at dealing with many of the issues you will encounter when importing data as a data scientist,
	# such as comments occurring in flat files, empty lines and missing values. Note that missing values are also commonly referred to
	# as NA or NaN. To wrap up this chapter, you're now going to import a slightly corrupted copy of the Titanic dataset
	# titanic_corrupt.txt, which
	# contains comments after the character '#'
	# is tab-delimited.

	# Import matplotlib.pyplot as plt
	import matplotlib.pyplot as plt

	# Assign filename: file
	file = 'titanic_corrupt.txt'

	# Import file: data
	data = pd.read_csv(file, sep='\t', comment='#', na_values='Nothing')

	# Print the head of the DataFrame
	print(data.head())

	# Plot 'Age' variable in a histogram
	pd.DataFrame.hist(data[['Age']])
	plt.xlabel('Age (years)')
	plt.ylabel('count')
	plt.show()



	######################################### Working with relational databases in Python ######################################


	######## What are the tables in the database? #######
	# In this exercise, you'll once again create an engine to connect to 'Chinook.sqlite'. Before you can get any data out of the database,
	# however, you'll need to know what tables it contains!

	# To this end, you'll save the table names to a list using the method table_names() on the engine and then you will print the list.



	# Import necessary module
	from sqlalchemy import create_engine

	# Create engine: engine
	engine = create_engine("sqlite:///Chinook.sqlite")

	# Save the table names to a list: table_names
	table_names = engine.table_names()

	# Print the table names to the shell
	print(table_names)



	############################## The Hello World of SQL Queries! ##########################
	# Now, it's time for liftoff! In this exercise, you'll perform the Hello World of SQL queries, SELECT, in order to retrieve all
	# columns of the table Album in the Chinook database. Recall that the query SELECT * selects all columns.


	# Import packages
	from sqlalchemy import create_engine
	import pandas as pd

	# Create engine: engine
	engine = create_engine('sqlite:///Chinook.sqlite')

	# Open engine connection: con
	con = engine.connect()

	# Perform query: rs
	rs = con.execute("SELECT * from Album")

	# Save results of the query to DataFrame: df
	df = pd.DataFrame(rs.fetchall())

	# Close connection
	con.close()

	# Print head of DataFrame df
	print(df.head())




	############################ Customizing the Hello World of SQL Queries ############################################
	# Congratulations on executing your first SQL query! Now you're going to figure out how to customize your query in order to:

	# Select specified columns from a table;
	# Select a specified number of rows;
	# Import column names from the database table.
	# Recall that Hugo performed a very similar query customization in the video:



	# Open engine in context manager
	# Perform query and save results to DataFrame: df
	with engine.connect() as con:
	rs = con.execute("SELECT LastName, Title from Employee")
	df = pd.DataFrame(rs.fetchmany(size = 3))
	df.columns = rs.keys()

	# Print the length of the DataFrame df
	print(len(df))

	# Print the head of the DataFrame df
	print(df.head())




	############################### Filtering your database records using SQL's WHERE #############################
	# You can now execute a basic SQL query to select records from any table in your database and you can also perform simple query
	# customizations to select particular columns and numbers of rows.

	# There are a couple more standard SQL query chops that will aid you in your journey to becoming an SQL ninja.

	# Let's say, for example that you wanted to get all records from the Customer table of the Chinook database for which the Country
	# is 'Canada'. You can do this very easily in SQL using a SELECT statement followed by a WHERE clause as follows:

	# SELECT * FROM Customer WHERE Country = 'Canada'
	# In fact, you can filter any SELECT statement by any condition



	# Create engine: engine
	engine = create_engine('sqlite:///Chinook.sqlite')

	# Open engine in context manager
	# Perform query and save results to DataFrame: df
	with engine.connect() as con:
	rs = con.execute('SELECT * from Employee WHERE EmployeeId >= 6')
	df = pd.DataFrame(rs.fetchall())
	df.columns = rs.keys()

	# Print the head of the DataFrame df
	print(df.head())





	######################################## Ordering your SQL records with ORDER BY ################################
	# You can also order your SQL query results. For example, if you wanted to get all records from the Customer table of the Chinook
	# database and order them in increasing order by the column SupportRepId, you could do so with the following query:

	# "SELECT * FROM Customer ORDER BY SupportRepId"
	# In fact, you can order any SELECT statement by any column.

	# In this interactive exercise, you'll select all records of the Employee table and order them in increasing order by the column BirthDate.

	# Packages are already imported as follows:

	# import pandas as pd
	# from sqlalchemy import create_engine
	# Get querying!


	# Create engine: engine
	engine = create_engine('sqlite:///Chinook.sqlite')

	# Open engine in context manager
	with engine.connect() as con:
	rs = con.execute('SELECT * FROM Employee ORDER BY BirthDate')
	df = pd.DataFrame(rs.fetchall())

	# Set the DataFrame's column names
	df.columns = rs.keys()


	# Print head of DataFrame
	print(df.head())



	########################## Pandas and The Hello World of SQL Queries! #########################################
	# Here, you'll take advantage of the power of pandas to write the results of your SQL query to a DataFrame in one swift line of
	# Python code!

	# You'll first import pandas and create the SQLite 'Chinook.sqlite' engine. Then you'll query the database to select all records
	# from the Album table.

	# Recall that to select all records from the Orders table in the Northwind database, Hugo executed the following command:

	# df = pd.read_sql_query("SELECT * FROM Orders", engine)



	# Import packages
	from sqlalchemy import create_engine
	import pandas as pd

	# Create engine: engine
	engine = create_engine('sqlite:///Chinook.sqlite')

	# Execute query and store records in DataFrame: df
	df = pd.read_sql_query('SELECT * FROM Album', engine)

	# Print head of DataFrame
	print(df.head())

	# Open engine in context manager
	# Perform query and save results to DataFrame: df1
	with engine.connect() as con:
	rs = con.execute("SELECT * FROM Album")
	df1 = pd.DataFrame(rs.fetchall())
	df1.columns = rs.keys()

	# Confirm that both methods yield the same result: does df = df1 ?
	print(df.equals(df1))





	########################################### Pandas for more complex querying ###############################################
	# Here, you'll become more familiar with the pandas function read_sql_query()
	# by using it to execute a more complex query: a SELECT statement followed by both a WHERE clause AND an ORDER BY clause.

	# You'll build a DataFrame that contains the rows of the Employee table for which the EmployeeId is greater than or equal to 6 and
	# you'll order these entries by BirthDate.


	# Import packages
	from sqlalchemy import create_engine
	import pandas as pd

	# Create engine: engine
	engine = create_engine('sqlite:///Chinook.sqlite')

	# Execute query and store records in DataFrame: df
	df = pd.read_sql_query(
	'SELECT * FROM Employee WHERE EmployeeId >= 6 ORDER BY BirthDate', engine)

	# Print head of DataFrame
	print(df.head())





	##################################### The power of SQL lies in relationships between tables: INNER JOIN ############################
	# Here, you'll perform your first INNER JOIN! You'll be working with your favourite SQLite database, Chinook.sqlite.
	# For each record in the Album table, you'll extract the Title along with the Name of the Artist. The latter will come from the
	# Artist table and so you will need to INNER JOIN these two tables on the ArtistID column of both.

	# Recall that to INNER JOIN the Orders and Customers tables from the Northwind database, Hugo executed the following SQL query:

	# "SELECT OrderID, CompanyName FROM Orders INNER JOIN Customers on Orders.CustomerID = Customers.CustomerID"
	# The following code has already been executed to import the necessary packages and to create the engine:

	# import pandas as pd
	# from sqlalchemy import create_engine
	# engine = create_engine('sqlite:///Chinook.sqlite')



	# Open engine in context manager
	# Perform query and save results to DataFrame: df
	with engine.connect() as con:
	rs = con.execute(
	'SELECT Title, Name FROM Album INNER JOIN Artist on Album.ArtistID = Artist.ArtistID',
	)
	df = pd.DataFrame(rs.fetchall())
	df.columns = rs.keys()

	# Print head of DataFrame df
	print(df.head())





	################################################### Filtering your INNER JOIN ###########################################
	# Congrats on performing your first INNER JOIN! You're now going to finish this chapter with one final exercise in which you
	# perform an INNER JOIN and filter the result using a WHERE clause.

	# Recall that to INNER JOIN the Orders and Customers tables from the Northwind database, Hugo executed the following SQL query:

	# "SELECT OrderID, CompanyName FROM Orders INNER JOIN Customers on Orders.CustomerID = Customers.CustomerID"
	# The following code has already been executed to import the neccesary packages and to create the engine:

	# import pandas as pd
	# from sqlalchemy import create_engine
	# engine = create_engine('sqlite:///Chinook.sqlite')


	# Execute query and store records in DataFrame: df
	df = pd.read_sql_query('SELECT * FROM PlaylistTrack INNER JOIN Track on PlaylistTrack.TrackId = Track.TrackId WHERE Milliseconds < 250000', engine)

	# Print head of DataFrame
	print(df.head())