davidro · September 28, 2015 06:36
diff --git a/week2.py b/week2.py
 # -*- coding: utf-8 -*-
 """
 Created on Fri Sep 25 18:35:21 2015
 @author: david
 """
 import pandas as pd
 import numpy as np

 # read the data from csv and store it to Pandas DataFrame variable named data
 data =  pd.read_csv('addhealth_pds.csv', low_memory=False)

 # upper-case all DataFrame column names 
 data.columns = map(str.upper, data.columns)

 # bug fix for display formats to avoid run time errors 
 pd.set_option('display.float_format', lambda x:'%f'%x)


 # check how many rows (observations) and columns (variables) are in DataFrame
 print ("\nIn DataSet there is:")
 print ("----------------------------------------------")
 print ("%s - observations" % len(data))
 print ("%s - variables \n" % len(data.columns))


 # Exemination of Independent Variables Frequency Distributions
 print ("\nExemination of Independent Variables Frequency Distributions")
 print ("============================================================================================")

 print ("\nVariable H1PF30: You have a lot of good qualities?")
 print ("----------------------------------------------")

 print ("\nCOUNTS:")
 c0 = data["H1PF30"].value_counts().sort_index()
 print (c0)

 print ("\nPERCENTAGES:") 
 p0 = data["H1PF30"].value_counts(normalize=True).sort_index()
 print (p0)

 print ("\nVariable H1PF31: You are physically fit?")
 print ("----------------------------------------------")

 print ("\nCOUNTS:")
 c1 = data["H1PF31"].value_counts().sort_index()
 print (c1)

 print ("\nPERCENTAGES:") 
 p1 = data["H1PF32"].value_counts(normalize=True).sort_index()
 print (p1)

 print ("\nVariable H1PF31: You have a lot to be proud of")
 print ("----------------------------------------------")

 print ("\nCOUNTS:")
 c2 = data["H1PF32"].value_counts().sort_index()
 print (c2)

 print ("\nPERCENTAGES:") 
 p2 = data["H1PF32"].value_counts(normalize=True).sort_index()
 print (p2)

 print ("\nVariable H1PF33: You like your self just the way you are")
 print ("----------------------------------------------")

 print ("\nCOUNTS:")
 c3 = data["H1PF33"].value_counts().sort_index()
 print (c3)

 print ("\nPERCENTAGES:") 
 p3 = data["H1PF33"].value_counts(normalize=True).sort_index()
 print (p3)

 print ("\nVariable H1PF34: You feel like you are doing everything just about right")
 print ("----------------------------------------------")

 print ("\nCOUNTS:")
 c4 = data["H1PF34"].value_counts().sort_index()
 print (c4)

 print ("\nPERCENTAGES:") 
 p4 = data["H1PF34"].value_counts(normalize=True).sort_index()
 print (p4)

 print ("\nVariable H1PF35: You feel socially accepted")
 print ("----------------------------------------------")

 print ("\nCOUNTS:")
 c5 = data["H1PF35"].value_counts().sort_index()
 print (c5)

 print ("\nPERCENTAGES:") 
 p5 = data["H1PF35"].value_counts(normalize=True).sort_index()
 print (p5)

 print ("\nVariable H1PF36: You feel loved and wanted ")
 print ("----------------------------------------------")

 print ("\nCOUNTS:")
 c6 = data["H1PF36"].value_counts().sort_index()
 print (c6)

 print ("\nPERCENTAGES:") 
 p6 = data["H1PF36"].value_counts(normalize=True).sort_index()
 print (p6)


 # Exemination of Dependent Variable Frequency Distributions
 print ("\nExemination of Dependent Variable Frequency Distributions")
 print ("============================================================================================")

 # With how many people, in total, including romantic relationship partners, have you ever had a sexual relationship?
 print ("\nVariable H1NR6: With how many people, in total, including romantic relationship partners, have you ever had a sexual relationship?")
 print ("----------------------------------------------")

 print ("\nCOUNTS:")
 cd = data["H1NR6"].value_counts().sort_index()
 print (cd)

 print ("\nPERCENTAGES:") 
 pd = data["H1NR6"].value_counts(normalize=True).sort_index()
 print (pd)


 # Subset of DataSet that includes only people who had an sexsual relationship, only answers from 1-500 (set converted to list) will be included (pandas .isin function)
 print ("\nSubset of DataSet,only people who had an sexsual relationship included")
 print ("============================================================================================")
 sub1 = data[(data['H1NR6'].isin(list(range(1,501))))]
 csd = sub1["H1NR6"].value_counts(normalize=True).sort_index()
 print (csd)

 print ("\nFrequnecy distributions on subset of data")
 print ("----------------------------------------------")

 print ("\nCOUNTS:")
 csd = sub1["H1NR6"].value_counts().sort_index()
 print (csd)

 print ("\nPERCENTAGES:") 
 psd = sub1["H1NR6"].value_counts(normalize=True).sort_index()
 print (psd)
	# -- coding: utf-8 --
	"""
	Created on Fri Sep 25 18:35:21 2015
	@author: david
	"""
	import pandas as pd
	import numpy as np

	# read the data from csv and store it to Pandas DataFrame variable named data
	data = pd.read_csv('addhealth_pds.csv', low_memory=False)

	# upper-case all DataFrame column names
	data.columns = map(str.upper, data.columns)

	# bug fix for display formats to avoid run time errors
	pd.set_option('display.float_format', lambda x:'%f'%x)


	# check how many rows (observations) and columns (variables) are in DataFrame
	print ("\nIn DataSet there is:")
	print ("----------------------------------------------")
	print ("%s - observations" % len(data))
	print ("%s - variables \n" % len(data.columns))


	# Exemination of Independent Variables Frequency Distributions
	print ("\nExemination of Independent Variables Frequency Distributions")
	print ("============================================================================================")

	print ("\nVariable H1PF30: You have a lot of good qualities?")
	print ("----------------------------------------------")

	print ("\nCOUNTS:")
	c0 = data["H1PF30"].value_counts().sort_index()
	print (c0)

	print ("\nPERCENTAGES:")
	p0 = data["H1PF30"].value_counts(normalize=True).sort_index()
	print (p0)

	print ("\nVariable H1PF31: You are physically fit?")
	print ("----------------------------------------------")

	print ("\nCOUNTS:")
	c1 = data["H1PF31"].value_counts().sort_index()
	print (c1)

	print ("\nPERCENTAGES:")
	p1 = data["H1PF32"].value_counts(normalize=True).sort_index()
	print (p1)

	print ("\nVariable H1PF31: You have a lot to be proud of")
	print ("----------------------------------------------")

	print ("\nCOUNTS:")
	c2 = data["H1PF32"].value_counts().sort_index()
	print (c2)

	print ("\nPERCENTAGES:")
	p2 = data["H1PF32"].value_counts(normalize=True).sort_index()
	print (p2)

	print ("\nVariable H1PF33: You like your self just the way you are")
	print ("----------------------------------------------")

	print ("\nCOUNTS:")
	c3 = data["H1PF33"].value_counts().sort_index()
	print (c3)

	print ("\nPERCENTAGES:")
	p3 = data["H1PF33"].value_counts(normalize=True).sort_index()
	print (p3)

	print ("\nVariable H1PF34: You feel like you are doing everything just about right")
	print ("----------------------------------------------")

	print ("\nCOUNTS:")
	c4 = data["H1PF34"].value_counts().sort_index()
	print (c4)

	print ("\nPERCENTAGES:")
	p4 = data["H1PF34"].value_counts(normalize=True).sort_index()
	print (p4)

	print ("\nVariable H1PF35: You feel socially accepted")
	print ("----------------------------------------------")

	print ("\nCOUNTS:")
	c5 = data["H1PF35"].value_counts().sort_index()
	print (c5)

	print ("\nPERCENTAGES:")
	p5 = data["H1PF35"].value_counts(normalize=True).sort_index()
	print (p5)

	print ("\nVariable H1PF36: You feel loved and wanted ")
	print ("----------------------------------------------")

	print ("\nCOUNTS:")
	c6 = data["H1PF36"].value_counts().sort_index()
	print (c6)

	print ("\nPERCENTAGES:")
	p6 = data["H1PF36"].value_counts(normalize=True).sort_index()
	print (p6)


	# Exemination of Dependent Variable Frequency Distributions
	print ("\nExemination of Dependent Variable Frequency Distributions")
	print ("============================================================================================")

	# With how many people, in total, including romantic relationship partners, have you ever had a sexual relationship?
	print ("\nVariable H1NR6: With how many people, in total, including romantic relationship partners, have you ever had a sexual relationship?")
	print ("----------------------------------------------")

	print ("\nCOUNTS:")
	cd = data["H1NR6"].value_counts().sort_index()
	print (cd)

	print ("\nPERCENTAGES:")
	pd = data["H1NR6"].value_counts(normalize=True).sort_index()
	print (pd)


	# Subset of DataSet that includes only people who had an sexsual relationship, only answers from 1-500 (set converted to list) will be included (pandas .isin function)
	print ("\nSubset of DataSet,only people who had an sexsual relationship included")
	print ("============================================================================================")
	sub1 = data[(data['H1NR6'].isin(list(range(1,501))))]
	csd = sub1["H1NR6"].value_counts(normalize=True).sort_index()
	print (csd)

	print ("\nFrequnecy distributions on subset of data")
	print ("----------------------------------------------")

	print ("\nCOUNTS:")
	csd = sub1["H1NR6"].value_counts().sort_index()
	print (csd)

	print ("\nPERCENTAGES:")
	psd = sub1["H1NR6"].value_counts(normalize=True).sort_index()
	print (psd)