Skip to content

Instantly share code, notes, and snippets.

@judy-zz
Created April 23, 2009 15:41
Show Gist options
  • Save judy-zz/100559 to your computer and use it in GitHub Desktop.
Save judy-zz/100559 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
# Take the "combined.csv" file, and reorganize and annotate it into the
# finished data file that the group wants.
require 'date'
require 'logger'
require 'rubygems'
require 'fastercsv'
require 'progressbar'
require 'active_support'
INPUT_FILE = 'output/combined.csv'
OUTPUT_FILE = 'output/background_questionnaires.csv'
LOG_FILE = 'transform.log'
# Hack the [] method on FasterCSV::Row so that it records whatever column
# we're currently working on. This is helpful during logging.
class FasterCSV
class Row
def fetch_with_remember_column(index_or_header)
$current_column = index_or_header
fetch_without_remember_column(index_or_header)
end
alias_method :fetch_without_remember_column, :[]
alias_method :[], :fetch_with_remember_column
end
end
# Set up logging.
File.unlink(LOG_FILE) if File.exist?(LOG_FILE)
$log = Logger.new(LOG_FILE)
$log.level = Logger::WARN
# Log the given warning to the log, including the input line number.
def warn(msg)
$log.warn "#{$input_line_number}:#{$current_column}: #{msg}"
end
# Return as a three-element array the number of years, months, and days
# between the two given dates.
#
# Time is counted from the "from" date, in the order of years,
# months, and days. This order is significant since leap years will
# pass unnoticed if they are "absorbed" by a year or month
# calculation. For example, 2007 to 2008 is a year and February 15th
# to March 15th is a month, whether they include a leap day or not.
# However, the leap year is observed properly if it occurs within the
# days portion of the calculation. For example February 20th to March
# 3rd may be 12 or 11 days, depending on whether it's a leap year.
def age(from_date, to_date)
return nil if from_date.nil? || to_date.nil?
from, to = from_date, to_date.dup
age_years = to.year - from.year
# Carry a year?
if from.month > to.month
age_years -= 1
carry_months = 12
else
carry_months = 0
end
age_months = to.month + carry_months - from.month
# Carry a month?
if from.day > to.day
age_months -= 1
if age_months < 0
# carry again
age_years -= 1
age_months += 12
end
# Carry the number of days in the month before the "to" month
carry_days = Time.days_in_month(to.last_month.month, to.last_month.year)
else
carry_days = 0
end
age_days = to.day + carry_days - from.day
[age_years, age_months, age_days]
end
# Return the chronological age of the subject at the time of the test's
# administration as number of months.
#
# If the day component of the subject's age is more than 15 days, then the
# month is rounded upwards.
def chronological_time(from_date, to_date)
age_years, age_months, age_days = age(from_date, to_date)
return nil if age_years.nil?
# Round up if they're more than halfway through the month.
if age_days > 15
if age_months == 11
age_years += 1
age_months = 0
else
age_months += 1
end
end
(age_years * 12) + age_months
end
# Turns date strings from csv file into date objects.
def date_from_string(string)
Date.parse(string, true)
rescue ArgumentError
nil
end
# Turns date/time strings from csv file into DateTime objects.
def datetime_from_string(string)
DateTime.parse(string, true)
rescue ArgumentError
nil
end
# Convert the given country string into a country code.
def codify_country(country)
case country
when "UnitedStatesMainland" then 1
when "PuertoRico" then 2
when "Cuba" then 3
when "Mexico" then 4
when "Other" then 5
when '' then ''
else
warn "Unrecognized country \"#{country}\""
country
end
end
# Codify the given state string into a state code.
def codify_state(state)
case state.downcase
when "fl", "fr", "florida", "florida ." then 1
when "nm", "new mexico" then 2
when "pa", "pennsylvania", "lancaster,pa" then 3
when "ma", "massachusetts" then 4
when "ny", "new york" then 5
when "co", "colorado" then 6
when "tx", "texas" then 7
when "ca", "california" then 8
when "il", "illinois" then 9
when "ga", "georgia" then 10
when "nj", "ns", "new jersey" then 11
when "ct", "connecticut" then 12
when '' then ''
else
warn "Unrecognized state \"#{state}\""
state
end
end
def codify_time_in_location(string)
case string.downcase
when "frombirth" then 1
when "weeks", "months", "years" then 2
when '' then ''
else
warn "Unrecognized duration \"#{string}\""
string
end
end
def codify_boolean(b)
case b.downcase
when "yes", "true" then 1
when "no", "false" then 2
when "", "n/a", "na", "dont_know" then ''
else
warn "Unrecognized boolean \"#{b}\""
b
end
end
def codify_relation(relation)
case relation.downcase
when "friend", "amigo", "amiga" then 1
when "neighbor", "vecino" then 2
when "grandmother", "abuela" then 3
when "aunt", "tia" then 4
when "cousin", "primo", "prima" then 5
when "family member", "varios miembros" then 6
when "daycare", "cuidado de ninos" then 7
when "other" then 8
when "" then ''
else
warn "Unrecognized relation \"#{relation}\""
relation
end
end
def codify_extended_relation(relation)
case relation.downcase
when "grandmother maternal" then 1
when "grandmother paternal" then 2
when "mother's sister" then 3
when "mother's aunt" then 4
when "mother's cousin" then 5
when "father's sister" then 6
when "father's aunt" then 7
when "father's cousin" then 8
when "close friend" then 9
when "other" then 10
when "" then ''
else
warn "Unrecognized extended relation \"#{relation}\""
relation
end
end
def codify_language_mix(mix)
case mix.downcase
when "all_spanish" then 1
when "more_spanish_than_english", "more_spanish" then 2
when "equal_spanish_and_english", "equal" then 3
when "more_english_than_spanish", "more_english" then 4
when "all_english" then 5
when "", "dont_know" then ''
else
warn "Unrecognized language mix \"#{mix}\""
mix
end
end
def codify_ed_program(ed)
case ed.downcase
when "head start", "head_start" then 1
when "even start", "even_start" then 2
when "vpk" then 3
when "k2", "kindergarten_2" then 4
when "k4", "kindergarten_4" then 5
when "k5", "kindergarten_5" then 6
when "other" then 7
when "", "na" then ''
else
warn "Unrecognized ed program \"#{ed}\""
ed
end
end
def codify_ears(ears)
case ears.downcase
when "one ear", "one_ear" then 1
when "two ears", "two_ears", "both ears" then 2
when "" then ''
else
warn "Unrecognized ears \"#{ears}\""
ears
end
end
def codify_ear(ear)
case ear.downcase
when "left ear", "left" then 1
when "right ear", "right" then 2
when "both ears", "both" then 3
when "don't know", "dont-know" then 4
when "" then ''
else
warn "Unrecognized ear \"#{ear}\""
ear
end
end
def codify_schooling(ed)
case ed.downcase
when "0" then 0
when "1", "2", "3", "4", "5", "6" then 1
when "7", "8" then 2
when "9", "10", "11" then 3
when "ged" then 4
when "high_school", "high school" then 5
when "some_college", "some college" then 6
when "associate" then 7
when "bachelors" then 8
when "masters" then 9
when "phd", "md" then 10
when "" then ''
else
warn "Unrecognized schooling \"#{ed}\""
ed
end
end
def codify_school_years(ed)
case ed.downcase
# TODO: Refactor me! I'm really, really wet!
when "0" then 0
when "1" then 1
when "2" then 2
when "3" then 3
when "4" then 4
when "5" then 5
when "6" then 6
when "7" then 7
when "8" then 8
when "9" then 9
when "10" then 10
when "11" then 11
when "ged", "high_school", "high school" then 12
when "some_college", "some college" then 13
when "associate" then 14
when "bachelors" then 16
when "masters" then 18
when "phd", "md" then 21
when "" then ''
else
warn "Unrecognized school years \"#{ed}\""
ed
end
end
def codify_first_to_move(ftm)
case ftm.downcase
when "you" then 1
when "parent" then 2
when "grandparent" then 3
when "greatgrandparent" then 4
else
warn "Unrecognized first_to_move \"#{ftm}\""
ftm
end
end
def codify_stepfather(step)
case step.downcase
when "father" then 1
when "stepfather" then 2
when "" then ''
else
warn "Unrecognized stepfather \"#{step}\""
step
end
end
def codify_ethnicity(eth)
case eth.downcase
when "hispanic/latino" then 1
when "white" then 2
when "african american" then 3
when "other" then 4
when "" then ''
else
warn "Unrecognized ethnicity \"#{eth}\""
eth
end
end
def codify_viewing_frequency(view)
case view.downcase
when "5-7 days / wk" then 364
when "2-4 days / wk" then 208
when "1 day / wk", "few times a month" then 52
when "1 day / mo" then 12
when "few times a year" then 6
when "not involved" then 0
when "deceased" then 0
when "livesinhomecountry" then 0
when "", "refused", "na" then ''
else
warn "Unrecognized viewing frequency \"#{view}\""
view
end
end
def codify_involvement(inv)
case inv.downcase
when "5-7 days / wk" then 7
when "2-4 days / wk" then 6
when "1 day / wk", "few times a month" then 5
when "1 day / mo", "1 time / mo" then 4
when "few times a year" then 4
when "not involved" then 3
when "deceased" then 2
when "livesinhomecountry" then 1
when "" then ''
else
warn "Unrecognized involvement \"#{inv}\""
inv
end
end
# Count the number of rows in the file so that we know how much work we have to do (to inform the progressbar)
num_lines = 0
File.open(INPUT_FILE) do |f|
while f.gets
num_lines += 1
end
end
# Create the progress bar
progress_bar = ProgressBar.new("Processing", num_lines)
# Loop through each line of the input file.
FasterCSV.open(OUTPUT_FILE, 'w') do |output|
# Write headers for output file.
headers = [
%w{ . language },
%w{ . filename },
%w{ . _FACILITY },
%w{ . _SUBJECT },
%w{ . _FILLED_IN },
%w{ 1 birth_date },
%w{ . chronological_age },
%w{ 2a birth_place },
%w{ 2b birth_place_city },
%w{ 2c birth_place_state },
%w{ 3 time_in_us },
%w{ . time_in_us_count },
%w{ . chronological_time_in_us },
%w{ 4 times_returned_to_home_country },
%w{ 5a time_in_home_country },
%w{ 5b time_in_home_country_count },
%w{ 6a to_one_yes },
%w{ 6b to_one_relation },
%w{ 6c to_one_relation_text },
%w{ 6d to_one_language_to_child },
%w{ 6e to_one_daycare_months },
%w{ 6f to_one_daycare_country },
%w{ 6g to_one_daycare_city },
%w{ 6h to_one_daycare_state },
%w{ 6i to_one_yes_2 },
%w{ 6j to_one_relation_2 },
%w{ 6k to_one_relation_text_2 },
%w{ 6l to_one_language_to_child_2 },
%w{ 6m to_one_daycare_months_2 },
%w{ 6n to_one_daycare_country_2 },
%w{ 6o to_one_daycare_city_2 },
%w{ 6p to_one_daycare_state_2 },
]
[ %w{ 7 two },
%w{ 8 three },
%w{ 9 four },
%w{ 10 five },
%w{ 11 six },
].each do |pair|
num, year = pair
headers.concat([
["#{num}a", "to_#{year}_yes" ],
["#{num}b", "to_#{year}_relation" ],
["#{num}c", "to_#{year}_relation_text" ],
["#{num}d", "to_#{year}_language_to_child" ],
["#{num}e", "to_#{year}_language_to_caregiver" ],
["#{num}f", "to_#{year}_daycare_months" ],
["#{num}g", "to_#{year}_daycare_country" ],
["#{num}h", "to_#{year}_daycare_city" ],
["#{num}i", "to_#{year}_daycare_state" ],
["#{num}j", "to_#{year}_yes_2" ],
["#{num}k", "to_#{year}_relation_2" ],
["#{num}l", "to_#{year}_relation_text_2" ],
["#{num}m", "to_#{year}_language_to_child_2" ],
["#{num}n", "to_#{year}_language_to_caregiver_2" ],
["#{num}o", "to_#{year}_daycare_months_2" ],
["#{num}p", "to_#{year}_daycare_country_2" ],
["#{num}q", "to_#{year}_daycare_city_2" ],
["#{num}r", "to_#{year}_daycare_state_2" ],
])
end
headers.concat([
%w{ 12a lived_with_other },
%w{ 12b lived_with_whom },
%w{ 12c lived_with_whom_other_text },
%w{ 12d to_one_daycare_country6 },
%w{ 12e live_with_time },
%w{ 12f lived_with_language_to_child },
%w{ 12g lived_with_language_to_person },
%w{ 13a early_head_start_attended },
%w{ 13b early_head_start_ages_attended_start_years },
%w{ 13c early_head_start_ages_attended_start_months },
%w{ . early_head_start_ages_attended_start },
%w{ 13d early_head_start_ages_attended_end_years },
%w{ 13e early_head_start_ages_attended_end_months },
%w{ . early_head_start_ages_attended_end },
%w{ 13f early_head_start_language_teachers_to_child },
%w{ 13g early_head_start_language_child_to_teachers },
%w{ 13h early_head_start_language_assistant_to_child },
%w{ 13i early_head_start_language_child_to_assistant },
%w{ 13j early_head_start_language_children_to_child },
%w{ 13k early_head_start_language_child_to_children },
%w{ 13l head_start_attended },
%w{ 13m head_start_ages_attended_start_years },
%w{ 13n head_start_ages_attended_start_months },
%w{ . head_start_ages_attended_start },
%w{ 13o head_start_ages_attended_end_years },
%w{ 13p head_start_ages_attended_end_months },
%w{ . head_start_ages_attended_end },
%w{ 13q head_start_language_teachers_to_child },
%w{ 13r head_start_language_child_to_teachers },
%w{ 13s head_start_language_assistant_to_child },
%w{ 13t head_start_language_child_to_assistant },
%w{ 13u head_start_language_children_to_child },
%w{ 13v head_start_language_child_to_children },
%w{ 13w even_start_attended },
%w{ 13x even_start_ages_attended_start_years },
%w{ 13y even_start_ages_attended_start_months },
%w{ . even_start_ages_attended_start },
%w{ 13z even_start_ages_attended_end_years },
%w{ 13aa even_start_ages_attended_end_months },
%w{ . even_start_ages_attended_end },
%w{ 13ab even_start_language_teachers_to_child },
%w{ 13ac even_start_language_child_to_teachers },
%w{ 13ad even_start_language_assistant_to_child },
%w{ 13ae even_start_language_child_to_assistant },
%w{ 13af even_start_language_children_to_child },
%w{ 13ag even_start_language_child_to_children },
%w{ 13ah vpk_attended },
%w{ 13ai vpk_start_ages_attended_start_years },
%w{ 13aj vpk_start_ages_attended_start_months },
%w{ . vpk_start_ages_attended_start },
%w{ 13ak vpk_start_ages_attended_end_years },
%w{ 13al vpk_start_ages_attended_end_months },
%w{ . vpk_start_ages_attended_end },
%w{ 13am vpk_language_teachers_to_child },
%w{ 13an vpk_language_child_to_teachers },
%w{ 13ao vpk_language_assistant_to_child },
%w{ 13ap vpk_language_child_to_assistant },
%w{ 13aq vpk_language_children_to_child },
%w{ 13ar vpk_language_child_to_children },
%w{ 13as other_ed_attended },
%w{ 13at other_program_name },
%w{ 13av other_start_ages_attended_start_years },
%w{ 13ax other_start_ages_attended_start_months },
%w{ . other_start_ages_attended_start },
%w{ 13ay other_start_ages_attended_end_years },
%w{ 13az other_start_ages_attended_end_months },
%w{ . other_start_ages_attended_end },
%w{ 13ba other_ed_language_teachers_to_child },
%w{ 13bb other_ed_language_child_to_teachers },
%w{ 13bc other_ed_language_assistant_to_child },
%w{ 13bd other_ed_language_child_to_assistant },
%w{ 13be other_ed_language_children_to_child },
%w{ 13bf other_ed_language_child_to_children },
%w{ 14a current_ed_program_1 },
%w{ 14b current_ed_program_other_text1 },
%w{ 14c current_ed_program_description_1 },
%w{ 14d to_one_daycare_country4 },
%w{ 14e current_ed_program_age_begun_1 },
%w{ 14g current_ed_program_day_per_week_1 },
%w{ 14h current_ed_program_hours_per_day_1 },
%w{ 14i current_ed_program_language_teachers_to_child_1 },
%w{ 14j current_ed_program_language_child_to_teachers_1 },
%w{ 14k current_ed_program_language_assistant_to_child_1 },
%w{ 14l current_ed_program_language_child_to_assistant_1 },
%w{ 14m current_ed_program_language_children_to_child_1 },
%w{ 14n current_ed_program_language_child_to_children_1 },
%w{ 14o current_ed_program_2 },
%w{ 14p current_ed_program_other_text2 },
%w{ 14q current_ed_program_description_2 },
%w{ 14r to_one_daycare_country5 },
%w{ 14s current_ed_program_age_begun_2 },
%w{ 14u current_ed_program_day_per_week_2 },
%w{ 14v current_ed_program_hours_per_day_2 },
%w{ 14w current_ed_program_language_teachers_to_child_2 },
%w{ 14x current_ed_program_language_child_to_teachers_2 },
%w{ 14y current_ed_program_language_assistant_to_child_2 },
%w{ 14z current_ed_program_language_child_to_assistant_2 },
%w{ 14aa current_ed_program_language_children_to_child_2 },
%w{ 14ab current_ed_program_language_child_to_children_2 },
%w{ 14ac current_ed_program_3 },
%w{ 14ad current_ed_program_other_text3 },
%w{ 14ae current_ed_program_description_3 },
%w{ 14af to_one_daycare_country3 },
%w{ 14ag current_ed_program_age_begun_3 },
%w{ 14ai current_ed_program_day_per_week_3 },
%w{ 14aj current_ed_program_hours_per_day_3 },
%w{ 14ak current_ed_program_language_teachers_to_child_3 },
%w{ 14al current_ed_program_language_child_to_teachers_3 },
%w{ 14am current_ed_program_language_assistant_to_child_3 },
%w{ 14an current_ed_program_language_child_to_assistant_3 },
%w{ 14ao current_ed_program_language_children_to_child_3 },
%w{ 14ap current_ed_program_language_child_to_children_3 },
%w{ 15a has_trouble_hearing },
%w{ 15b has_trouble_hearing_always },
%w{ 15c has_trouble_hearing_infection },
%w{ 15d has_trouble_hearing_noisy },
%w{ 15e has_trouble_hearing_ears },
%w{ 16a ear_infection },
%w{ 16b ear_infection_ear },
%w{ 16c ear_infection_number },
%w{ 16d ear_infection_under_1_year },
%w{ 16e ear_infection_1_2 },
%w{ 16f ear_infection_2_3 },
%w{ 16g ear_infection_3_4 },
%w{ 16h ear_infection_4_5 },
%w{ 16i ear_infection_5_6 },
%w{ 16j ear_infection_has_had_tubes },
%w{ 17 difficulty_understanding },
%w{ 18a difficulty_understanding_others },
%w{ 18b difficulty_understanding_father },
%w{ 18c difficulty_understanding_grandfather },
%w{ 18d difficulty_understanding_grandmother },
%w{ 18e difficulty_understanding_brother },
%w{ 18f difficulty_understanding_sister },
%w{ 18g difficulty_understanding_teacher },
%w{ 18h difficulty_understanding_relative },
%w{ 18i difficulty_understanding_relative_text },
%w{ 18j difficulty_understanding_other },
%w{ 18k difficulty_understanding_other_text },
%w{ 19a think_speech_problem },
%w{ 19b think_speech_problem_text2 },
%w{ 19c think_speech_problem_age_problem },
%w{ 19d think_speech_problem_aware },
%w{ 19e think_speech_problem_swaps_sounds },
%w{ 20a think_language_problem },
%w{ 20b think_speech_problem_text },
%w{ 20c think_language_problem_age },
%w{ 21a has_received_therapy },
%w{ 21b has_received_therapy_time },
%w{ 21c has_received_therapy_time_count },
%w{ 21d has_received_therapy_agency },
%w{ 22 parent_birth_date },
%w{ . parent_age_months },
%w{ 23a parent_birth_place },
%w{ 23b parent_birth_place_city },
%w{ 23c parent_birth_place_stateprovince },
%w{ 24a parent_time_in_us },
%w{ 24b parent_time_in_us_count },
%w{ . parent_time_in_us_months },
%w{ 25 parent_times_returned_to_home_country },
%w{ 26a parent_time_in_home_country },
%w{ 26b parent_time_in_home_country_count },
%w{ . parent_time_in_home_country_months },
%w{ 27a parent_work_outside_home },
%w{ 27b parent_job_title },
%w{ 27c parent_job_responsibilities },
%w{ 27d parent_job_hours },
%w{ 28a parent_schooling },
%w{ . parent_years_of_schooling },
%w{ 28b parent_schooling_name },
%w{ 28c parent_schooling_units },
%w{ 28d parent_schooling_time },
%w{ 29 parent_first_to_move },
%w{ . father_or_stepfather },
%w{ 30a father_birth_date_options },
%w{ 30b father_birth_date },
%w{ . father_age },
%w{ 31a father_ethnicity },
%w{ 31b father_ethnicity_other_text },
%w{ 32a father_birth_place_na },
%w{ 32b father_birth_place },
%w{ 32c father_birth_place_city },
%w{ 32d father_birth_place_state },
%w{ 33a father_time_in_us_na },
%w{ 33b father_time_in_us },
%w{ 33c father_time_in_us_count },
%w{ . father_time_in_us_months },
%w{ 34a father_job_outside_home },
%w{ 34b father_job_title },
%w{ 34c father_job_responsibilities },
%w{ 34d father_job_hours },
%w{ 35a father_schooling },
%w{ . father_schooling_name },
%w{ 35b father_schooling_units },
%w{ 35c father_schooling_time },
%w{ 36 father_lives_with_parent },
%w{ 37a father_viewing_frequency },
%w{ . father_involvement },
%w{ 38a parent_speaks_english },
%w{ 38b parent_speaks_spanish },
%w{ 38c parent_speaks_other },
%w{ 38d parent_speaks_spanish_puertorican },
%w{ 38e parent_speaks_spanish_mexican },
%w{ 38f parent_speaks_spanish_cuban },
%w{ 38g parent_speaks_spanish_other },
%w{ 38h parent_speaks_spanish_other_text },
])
output << headers.map { |h| h[0] }
output << headers.map { |h| h[1] }
# Process each line of the input file.
$input_line_number = 0
FasterCSV.foreach(INPUT_FILE, :headers => true, :return_headers => true) do |input|
$input_line_number += 1
# Increment the progress bar.
progress_bar.inc
# Just skip the header row.
next if input.header_row?
# The array where we're storing the current row of output values.
row = []
row << input['language']
row << input['filename']
row << input['_FACILITY']
row << input['_SUBJECT']
row << datetime_from_string(input['_FILLED_IN'])
row << date_from_string(input['birth_date'])
# Calculate chronological age.
chronological_age = chronological_time(date_from_string(input['birth_date']), date_from_string(input['_FILLED_IN']))
row << chronological_age
# Codify birth place.
row << codify_country(input['birth_place'])
row << input['birth_place_city']
# Codify birth place state.
row << codify_state(input['birth_place_state'])
row << codify_time_in_location(input['time_in_us'])
row << input['time_in_us_count']
if codify_time_in_location(input['time_in_us']) == 1 # from birth
chronological_time_in_us = chronological_age
else
chronological_time_in_us = input['time_in_us_count']
end
row << chronological_time_in_us
row << input['times_returned_to_home_country']
# TODO: Clarify the amount of time spent in home country.
row << input['time_in_home_country']
row << input['time_in_home_country_count']
row << codify_boolean(input['to_one_yes'])
row << codify_relation(input['to_one_relation'])
row << input['to_one_relation_text']
row << codify_language_mix(input['to_one_language_to_child'])
row << input['to_one_daycare_months']
row << codify_country(input['to_one_daycare_country'])
row << input['to_one_daycare_city']
row << codify_state(input['to_one_daycare_state'])
row << codify_boolean(input['to_one_yes_2'])
row << codify_relation(input['to_one_relation_2'])
row << input['to_one_relation_text_2']
row << codify_language_mix(input['to_one_language_to_child_2'])
row << input['to_one_daycare_months_2']
row << codify_country(input['to_one_daycare_country_2'])
row << input['to_one_daycare_city_2']
row << codify_state(input['to_one_daycare_state_2'])
# The below have the extra "to_'year'_language_to_caregiver", which is
# why the above is not included.
%w{two three four five six}.each do |year|
row << codify_boolean(input["to_#{year}_yes"])
row << codify_relation(input["to_#{year}_relation"])
row << input["to_#{year}_relation_text"]
row << codify_language_mix(input["to_#{year}_language_to_child"])
row << codify_language_mix(input["to_#{year}_language_to_caregiver"])
row << input["to_#{year}_daycare_months"]
row << codify_country(input["to_#{year}_daycare_country"])
row << input["to_#{year}_daycare_city"]
row << codify_state(input["to_#{year}_daycare_state"])
row << codify_boolean(input["to_#{year}_yes_2"])
row << codify_relation(input["to_#{year}_relation_2"])
row << input["to_#{year}_relation_text_2"]
row << codify_language_mix(input["to_#{year}_language_to_child_2"])
row << codify_language_mix(input["to_#{year}_language_to_caregiver_2"])
row << input["to_#{year}_daycare_months_2"]
row << codify_country(input["to_#{year}_daycare_country_2"])
row << input["to_#{year}_daycare_city_2"]
row << codify_state(input["to_#{year}_daycare_state_2"])
end
row << codify_boolean(input["lived_with_other"])
row << codify_extended_relation(input["lived_with_whom"])
row << input["lived_with_whom_other_text"]
row << codify_country(input["to_one_daycare_country6"])
row << input["live_with_time"]
row << codify_language_mix(input["lived_with_language_to_child"])
row << codify_language_mix(input["lived_with_language_to_person"])
row << codify_boolean(input["early_head_start_attended"])
row << input["early_head_start_ages_attended_start_years"]
row << input["early_head_start_ages_attended_start_months"]
row << (input["early_head_start_ages_attended_start_years"].to_i * 12) + input["early_head_start_ages_attended_start_months"].to_i
row << input["early_head_start_ages_attended_end_years"]
row << input["early_head_start_ages_attended_end_months"]
row << (input["early_head_start_ages_attended_end_years"].to_i * 12) + input["early_head_start_ages_attended_end_months"].to_i
row << codify_language_mix(input["early_head_start_language_teachers_to_child"])
row << codify_language_mix(input["early_head_start_language_child_to_teachers"])
row << codify_language_mix(input["early_head_start_language_assistant_to_child"])
row << codify_language_mix(input["early_head_start_language_child_to_assistant"])
row << codify_language_mix(input["early_head_start_language_children_to_child"])
row << codify_language_mix(input["early_head_start_language_child_to_children"])
row << codify_boolean(input["head_start_attended"])
row << input["head_start_ages_attended_start_years"]
row << input["head_start_ages_attended_start_months"]
row << (input["head_start_ages_attended_start_years"].to_i * 12) + input["head_start_ages_attended_start_months"].to_i
row << input["head_start_ages_attended_end_years"]
row << input["head_start_ages_attended_end_months"]
row << (input["head_start_ages_attended_end_years"].to_i * 12) + input["head_start_ages_attended_end_months"].to_i
row << codify_language_mix(input["head_start_language_teachers_to_child"])
row << codify_language_mix(input["head_start_language_child_to_teachers"])
row << codify_language_mix(input["head_start_language_assistant_to_child"])
row << codify_language_mix(input["head_start_language_child_to_assistant"])
row << codify_language_mix(input["head_start_language_children_to_child"])
row << codify_language_mix(input["head_start_language_child_to_children"])
row << codify_boolean(input["even_start_attended"])
row << input["even_start_ages_attended_start_years"]
row << input["even_start_ages_attended_start_months"]
row << (input["even_start_ages_attended_start_years"].to_i * 12) + input["even_start_ages_attended_start_months"].to_i
row << input["even_start_ages_attended_end_years"]
row << input["even_start_ages_attended_end_months"]
row << (input["even_start_ages_attended_end_years"].to_i * 12) + input["even_start_ages_attended_end_months"].to_i
row << codify_language_mix(input["even_start_language_teachers_to_child"])
row << codify_language_mix(input["even_start_language_child_to_teachers"])
row << codify_language_mix(input["even_start_language_assistant_to_child"])
row << codify_language_mix(input["even_start_language_child_to_assistant"])
row << codify_language_mix(input["even_start_language_children_to_child"])
row << codify_language_mix(input["even_start_language_child_to_children"])
row << codify_boolean(input["vpk_attended"])
row << input["vpk_start_ages_attended_start_years"]
row << input["vpk_start_ages_attended_start_months"]
row << (input["vpk_start_ages_attended_start_years"].to_i * 12) + input["vpk_start_ages_attended_start_months"].to_i
row << input["vpk_start_ages_attended_end_years"]
row << input["vpk_start_ages_attended_end_months"]
row << (input["vpk_start_ages_attended_end_years"].to_i * 12) + input["vpk_start_ages_attended_end_months"].to_i
row << codify_language_mix(input["vpk_language_teachers_to_child"])
row << codify_language_mix(input["vpk_language_child_to_teachers"])
row << codify_language_mix(input["vpk_language_assistant_to_child"])
row << codify_language_mix(input["vpk_language_child_to_assistant"])
row << codify_language_mix(input["vpk_language_children_to_child"])
row << codify_language_mix(input["vpk_language_child_to_children"])
row << codify_boolean(input["other_ed_attended"])
row << input["other_program_name"]
row << input["other_start_ages_attended_start_years"]
row << input["other_start_ages_attended_start_months"]
row << (input["other_start_ages_attended_start_years"].to_i * 12) + input["vpk_start_ages_attended_start_months"].to_i
row << input["other_start_ages_attended_end_years"]
row << input["other_start_ages_attended_end_months"]
row << (input["other_start_ages_attended_end_years"].to_i * 12) + input["vpk_start_ages_attended_end_months"].to_i
row << codify_language_mix(input["other_ed_language_teachers_to_child"])
row << codify_language_mix(input["other_ed_language_child_to_teachers"])
row << codify_language_mix(input["other_ed_language_assistant_to_child"])
row << codify_language_mix(input["other_ed_language_child_to_assistant"])
row << codify_language_mix(input["other_ed_language_children_to_child"])
row << codify_language_mix(input["other_ed_language_child_to_children"])
row << codify_ed_program(input["current_ed_program_1"])
row << input["current_ed_program_other_text1"]
row << input["current_ed_program_description_1"]
row << codify_country(input["to_one_daycare_country4"])
row << (input["current_ed_program_age_begun_years_1"].to_i * 12) + input["current_ed_program_age_begun_months_1"].to_i
row << input["current_ed_program_day_per_week_1"]
row << input["current_ed_program_hours_per_day_1"]
row << codify_language_mix(input["current_ed_program_language_teachers_to_child_1"])
row << codify_language_mix(input["current_ed_program_language_child_to_teachers_1"])
row << codify_language_mix(input["current_ed_program_language_assistant_to_child_1"])
row << codify_language_mix(input["current_ed_program_language_child_to_assistant_1"])
row << codify_language_mix(input["current_ed_program_language_children_to_child_1"])
row << codify_language_mix(input["current_ed_program_language_child_to_children_1"])
row << codify_ed_program(input["current_ed_program_2"])
row << input["current_ed_program_other_text2"]
row << input["current_ed_program_description_2"]
row << codify_country(input["to_one_daycare_country5"])
row << (input["current_ed_program_age_begun_years_2"].to_i * 12) + input["current_ed_program_age_begun_months_2"].to_i
row << input["current_ed_program_day_per_week_2"]
row << input["current_ed_program_hours_per_day_2"]
row << codify_language_mix(input["current_ed_program_language_teachers_to_child_2"])
row << codify_language_mix(input["current_ed_program_language_child_to_teachers_2"])
row << codify_language_mix(input["current_ed_program_language_assistant_to_child_2"])
row << codify_language_mix(input["current_ed_program_language_child_to_assistant_2"])
row << codify_language_mix(input["current_ed_program_language_children_to_child_2"])
row << codify_language_mix(input["current_ed_program_language_child_to_children_2"])
row << codify_ed_program(input["current_ed_program_3"])
row << input["current_ed_program_other_text3"]
row << input["current_ed_program_description_3"]
row << codify_country(input["to_one_daycare_country3"])
row << (input["current_ed_program_age_begun_years_3"].to_i * 12) + input["current_ed_program_age_begun_months_3"].to_i
row << input["current_ed_program_day_per_week_3"]
row << input["current_ed_program_hours_per_day_3"]
row << codify_language_mix(input["current_ed_program_language_teachers_to_child_3"])
row << codify_language_mix(input["current_ed_program_language_child_to_teachers_3"])
row << codify_language_mix(input["current_ed_program_language_assistant_to_child_3"])
row << codify_language_mix(input["current_ed_program_language_child_to_assistant_3"])
row << codify_language_mix(input["current_ed_program_language_children_to_child_3"])
row << codify_language_mix(input["current_ed_program_language_child_to_children_3"])
row << codify_boolean(input["has_trouble_hearing"])
row << codify_boolean(input["has_trouble_hearing_always"])
row << codify_boolean(input["has_trouble_hearing_infection"])
row << codify_boolean(input["has_trouble_hearing_noisy"])
row << codify_ears(input["has_trouble_hearing_ears"])
row << codify_boolean(input["ear_infection"])
row << codify_ear(input["ear_infection_ear"])
row << input["ear_infection_number"]
row << codify_boolean(input["ear_infection_under_1_year"])
row << codify_boolean(input["ear_infection_1_2"])
row << codify_boolean(input["ear_infection_2_3"])
row << codify_boolean(input["ear_infection_3_4"])
row << codify_boolean(input["ear_infection_4_5"])
row << codify_boolean(input["ear_infection_5_6"])
row << codify_boolean(input["ear_infection_has_had_tubes"])
row << codify_boolean(input["difficulty_understanding"])
row << codify_boolean(input["difficulty_understanding_others"])
row << codify_boolean(input["difficulty_understanding_father"])
row << codify_boolean(input["difficulty_understanding_grandfather"])
row << codify_boolean(input["difficulty_understanding_grandmother"])
row << codify_boolean(input["difficulty_understanding_brother"])
row << codify_boolean(input["difficulty_understanding_sister"])
row << codify_boolean(input["difficulty_understanding_teacher"])
row << codify_boolean(input["difficulty_understanding_relative"])
row << input["difficulty_understanding_relative_text"]
row << codify_boolean(input["difficulty_understanding_other"])
row << input["difficulty_understanding_other_text"]
row << codify_boolean(input["think_speech_problem"])
row << input["think_speech_problem_text2"]
row << input["think_speech_problem_age_problem"]
row << input["think_speech_problem_aware"]
row << input["think_speech_problem_swaps_sounds"]
row << codify_boolean(input["think_language_problem"])
row << input["think_speech_problem_text"]
row << input["think_language_problem_age"]
row << codify_boolean(input["has_received_therapy"])
row << input["has_received_therapy_time"]
row << input["has_received_therapy_time_count"]
row << input["has_received_therapy_agency"]
chronological_age = chronological_time(date_from_string(input['parent_birth_date']), date_from_string(input['_FILLED_IN']))
row << input['parent_birth_date']
row << chronological_age
row << codify_country(input['parent_birth_place'])
row << input['parent_birth_place_city']
row << codify_state(input['parent_birth_place_stateprovince'])
row << input['parent_time_in_us']
row << input['parent_time_in_us_count']
if input['parent_time_in_us'].downcase == "frombirth"
row << chronological_age
elsif input['parent_time_in_us'].downcase == "years"
row << (input['parent_time_in_us_count'].to_i * 12)
elsif input['parent_time_in_us'].downcase == "months"
row << input['parent_time_in_us_count']
elsif input['parent_time_in_us'].downcase == "weeks"
row << (input['parent_time_in_us_count'].to_i / 4)
elsif input['parent_time_in_us'].downcase == ""
row << ""
else
warn "Unrecognized parent_time_in_us \"#{input['parent_time_in_us']}\""
row << input['parent_time_in_us_count']
end
row << input['parent_times_returned_to_home_country']
row << input['parent_time_in_home_country']
row << input['parent_time_in_home_country_count']
if input['parent_time_in_home_country'].downcase == "frombirth"
row << chronological_age
elsif input['parent_time_in_home_country'].downcase == "years"
row << (input['parent_time_in_home_country_count'].to_i * 12)
elsif input['parent_time_in_home_country'].downcase == "months"
row << input['parent_time_in_home_country_count']
elsif input['parent_time_in_home_country'].downcase == "weeks"
row << (input['parent_time_in_home_country_count'].to_i / 4)
elsif input['parent_time_in_home_country'].downcase == "days"
row << (input['parent_time_in_home_country_count'].to_i / 30)
elsif input['parent_time_in_home_country'] == ""
row << ""
else
warn "Unrecognized parent_time_in_home_country \"#{input['parent_time_in_home_country']}\""
row << input['parent_time_in_home_country_count']
end
row << codify_boolean(input['parent_work_outside_home'])
row << input['parent_job_title']
row << input['parent_job_responsibilities']
row << input['parent_job_hours']
row << codify_schooling(input['parent_schooling'])
row << codify_school_years(input['parent_schooling'])
row << input['parent_schooling_name']
row << input['parent_schooling_units']
row << input['parent_schooling_time']
row << codify_first_to_move(input['parent_first_to_move'])
row << codify_stepfather(input['father_or_stepfather'])
row << input['father_birth_date_options']
row << input['father_birth_date']
father_chronological_age = chronological_time(date_from_string(input['father_birth_date']), date_from_string(input['_FILLED_IN']))
row << father_chronological_age
row << codify_ethnicity(input['father_ethnicity'])
row << input['father_ethnicity_other_text']
row << input['father_birth_place_na']
row << codify_country(input['father_birth_place'])
row << input['father_birth_place_city']
row << codify_state(input['father_birth_place_state'])
row << codify_boolean(input['father_time_in_us_na'])
row << input['father_time_in_us']
row << input['father_time_in_us_count']
if input['father_time_in_us'].downcase == "frombirth"
row << father_chronological_age
elsif input['father_time_in_us'].downcase == "years"
row << input['father_time_in_us_count']
elsif input['father_time_in_us'].downcase == "months"
row << (input['father_time_in_us_count'].to_i / 12)
elsif input['father_time_in_us'].downcase == "weeks"
row << (input['father_time_in_us_count'].to_i / 52)
elsif input['father_time_in_us'].downcase == "" || input['father_time_in_us'].downcase == "doesnotliveinus" || input['father_time_in_us'].downcase == "unknown"
row << ""
else
warn "Unrecognized father_time_in_us \"#{input['father_time_in_us']}\""
row << input['father_time_in_us_count']
end
row << codify_boolean(input['father_job_outside_home'])
row << input['father_job_title']
row << input['father_job_responsibilities']
row << input['father_job_hours']
row << codify_schooling(input['father_schooling'])
row << input['father_schooling_name']
row << input['father_schooling_units']
row << codify_school_years(input['father_schooling'])
row << codify_boolean(input['father_lives_with_parent'])
row << codify_viewing_frequency(input['father_viewing_frequency'])
row << codify_involvement(input['father_viewing_frequency'])
row << codify_boolean(input['parent_speaks_english'])
row << codify_boolean(input['parent_speaks_spanish'])
row << codify_boolean(input['parent_speaks_other'])
row << codify_boolean(input['parent_speaks_spanish_puertorican'])
row << codify_boolean(input['parent_speaks_spanish_mexican'])
row << codify_boolean(input['parent_speaks_spanish_cuban'])
row << codify_boolean(input['parent_speaks_spanish_other'])
row << input['parent_speaks_spanish_other_text']
output << row
end
end
progress_bar.finish
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment