Created
September 28, 2015 06:36
-
-
Save davidro/c8bf9b0d1df8b24d0997 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Fri Sep 25 18:35:21 2015 | |
@author: david | |
""" | |
import pandas as pd | |
import numpy as np | |
# read the data from csv and store it to Pandas DataFrame variable named data | |
data = pd.read_csv('addhealth_pds.csv', low_memory=False) | |
# upper-case all DataFrame column names | |
data.columns = map(str.upper, data.columns) | |
# bug fix for display formats to avoid run time errors | |
pd.set_option('display.float_format', lambda x:'%f'%x) | |
# check how many rows (observations) and columns (variables) are in DataFrame | |
print ("\nIn DataSet there is:") | |
print ("----------------------------------------------") | |
print ("%s - observations" % len(data)) | |
print ("%s - variables \n" % len(data.columns)) | |
# Exemination of Independent Variables Frequency Distributions | |
print ("\nExemination of Independent Variables Frequency Distributions") | |
print ("============================================================================================") | |
print ("\nVariable H1PF30: You have a lot of good qualities?") | |
print ("----------------------------------------------") | |
print ("\nCOUNTS:") | |
c0 = data["H1PF30"].value_counts().sort_index() | |
print (c0) | |
print ("\nPERCENTAGES:") | |
p0 = data["H1PF30"].value_counts(normalize=True).sort_index() | |
print (p0) | |
print ("\nVariable H1PF31: You are physically fit?") | |
print ("----------------------------------------------") | |
print ("\nCOUNTS:") | |
c1 = data["H1PF31"].value_counts().sort_index() | |
print (c1) | |
print ("\nPERCENTAGES:") | |
p1 = data["H1PF32"].value_counts(normalize=True).sort_index() | |
print (p1) | |
print ("\nVariable H1PF31: You have a lot to be proud of") | |
print ("----------------------------------------------") | |
print ("\nCOUNTS:") | |
c2 = data["H1PF32"].value_counts().sort_index() | |
print (c2) | |
print ("\nPERCENTAGES:") | |
p2 = data["H1PF32"].value_counts(normalize=True).sort_index() | |
print (p2) | |
print ("\nVariable H1PF33: You like your self just the way you are") | |
print ("----------------------------------------------") | |
print ("\nCOUNTS:") | |
c3 = data["H1PF33"].value_counts().sort_index() | |
print (c3) | |
print ("\nPERCENTAGES:") | |
p3 = data["H1PF33"].value_counts(normalize=True).sort_index() | |
print (p3) | |
print ("\nVariable H1PF34: You feel like you are doing everything just about right") | |
print ("----------------------------------------------") | |
print ("\nCOUNTS:") | |
c4 = data["H1PF34"].value_counts().sort_index() | |
print (c4) | |
print ("\nPERCENTAGES:") | |
p4 = data["H1PF34"].value_counts(normalize=True).sort_index() | |
print (p4) | |
print ("\nVariable H1PF35: You feel socially accepted") | |
print ("----------------------------------------------") | |
print ("\nCOUNTS:") | |
c5 = data["H1PF35"].value_counts().sort_index() | |
print (c5) | |
print ("\nPERCENTAGES:") | |
p5 = data["H1PF35"].value_counts(normalize=True).sort_index() | |
print (p5) | |
print ("\nVariable H1PF36: You feel loved and wanted ") | |
print ("----------------------------------------------") | |
print ("\nCOUNTS:") | |
c6 = data["H1PF36"].value_counts().sort_index() | |
print (c6) | |
print ("\nPERCENTAGES:") | |
p6 = data["H1PF36"].value_counts(normalize=True).sort_index() | |
print (p6) | |
# Exemination of Dependent Variable Frequency Distributions | |
print ("\nExemination of Dependent Variable Frequency Distributions") | |
print ("============================================================================================") | |
# With how many people, in total, including romantic relationship partners, have you ever had a sexual relationship? | |
print ("\nVariable H1NR6: With how many people, in total, including romantic relationship partners, have you ever had a sexual relationship?") | |
print ("----------------------------------------------") | |
print ("\nCOUNTS:") | |
cd = data["H1NR6"].value_counts().sort_index() | |
print (cd) | |
print ("\nPERCENTAGES:") | |
pd = data["H1NR6"].value_counts(normalize=True).sort_index() | |
print (pd) | |
# Subset of DataSet that includes only people who had an sexsual relationship, only answers from 1-500 (set converted to list) will be included (pandas .isin function) | |
print ("\nSubset of DataSet,only people who had an sexsual relationship included") | |
print ("============================================================================================") | |
sub1 = data[(data['H1NR6'].isin(list(range(1,501))))] | |
csd = sub1["H1NR6"].value_counts(normalize=True).sort_index() | |
print (csd) | |
print ("\nFrequnecy distributions on subset of data") | |
print ("----------------------------------------------") | |
print ("\nCOUNTS:") | |
csd = sub1["H1NR6"].value_counts().sort_index() | |
print (csd) | |
print ("\nPERCENTAGES:") | |
psd = sub1["H1NR6"].value_counts(normalize=True).sort_index() | |
print (psd) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment