tpapp · February 7, 2017 21:12
diff --git a/code_20170207.jl b/code_20170207.jl
 using AMDB
 using DataStructures
 using IntervalSets
 using Plots
 using DataFrames                # you may need to add these libraries
 using StatPlots                 # with Pkg.add

 records = deserialize_gz(expanduser("~/research/AMDB/data/AMDB_subsample.jls.gz"))
 plotly() # this makes plots appear in your browser, you can use other backends

 ################################################################################

 """
 Return a counter for total spell durations by type in `interval`, from
 data `records`.
 """
 function spell_durations_in_year(records, interval)
    c = counter(AMP.Spell) # establish a counter that counts objects in AMP.Spell
    for (_, data) in records # parsing out the data we want from records
        for spell in data.AMP_spells
            # converting the days into integers
            l = convert(Int,
                        IntervalSets.width(intersect(spell.interval, interval)))
            # collecting the spell types and number of days together
            push!(c, spell.status, l)
        end
    end
    c # showing the counter
 end

 """
 Normalized total durations by year, using `spell_durations_in_year`.
 """
 function year_shares(records, year)
    normalize(spell_durations_in_year(records,
                                      Date(year, 1, 1)..Date(year, 12, 31)),1)
 end

 function extract_spell_statistics(year_statistics, spelltype)
    [get(year_statistic, spelltype, 0) for year_statistic in year_statistics]
 end

 all_years = 1972:2007
 year_stats = [year_shares(records, year) for year in all_years]

 # to prove this thing actually works
 plot(all_years, extract_spell_statistics(year_stats, AMP.other_insured_time))

 # collect to a DataFrame
 year_df = let years = [], spelltypes = [], ratios = []
    for (year,stats) in zip(all_years, year_stats)
        for (spelltype, ratio) in stats
            push!(years, year)
            push!(spelltypes, spelltype)
            push!(ratios, ratio)
        end
    end
    DataFrame(year = years, spelltype = spelltypes, ratio = ratios)
 end

 # rather crowded plot
 plot(year_df, :year, :ratio, group=:spelltype)

 # let's make it simpler

 # average ratios
 ratio_avg = sort(aggregate(year_df, :spelltype, mean), cols=:ratio_mean)

 function pick_spelltypes(data, spelltypes)
    keep = [spelltype ∈ spelltypes for spelltype in data[:spelltype]]
    data[keep,:]
 end

 picked_plot(data, spelltypes) = plot(pick_spelltypes(data, spelltypes),
                                     :year, :ratio, group = :spelltype)

 p1 = picked_plot(year_df, ratio_avg[1:6, :spelltype])
 p2 = picked_plot(year_df, ratio_avg[7:12, :spelltype])
 p3 = picked_plot(year_df, ratio_avg[13:18, :spelltype])
 p4 = picked_plot(year_df, ratio_avg[19:end, :spelltype])

 plot(p1, p2, p3, p4)
	using AMDB
	using DataStructures
	using IntervalSets
	using Plots
	using DataFrames # you may need to add these libraries
	using StatPlots # with Pkg.add

	records = deserialize_gz(expanduser("~/research/AMDB/data/AMDB_subsample.jls.gz"))
	plotly() # this makes plots appear in your browser, you can use other backends

	################################################################################

	"""
	Return a counter for total spell durations by type in `interval`, from
	data `records`.
	"""
	function spell_durations_in_year(records, interval)
	c = counter(AMP.Spell) # establish a counter that counts objects in AMP.Spell
	for (_, data) in records # parsing out the data we want from records
	for spell in data.AMP_spells
	# converting the days into integers
	l = convert(Int,
	IntervalSets.width(intersect(spell.interval, interval)))
	# collecting the spell types and number of days together
	push!(c, spell.status, l)
	end
	end
	c # showing the counter
	end

	"""
	Normalized total durations by year, using `spell_durations_in_year`.
	"""
	function year_shares(records, year)
	normalize(spell_durations_in_year(records,
	Date(year, 1, 1)..Date(year, 12, 31)),1)
	end

	function extract_spell_statistics(year_statistics, spelltype)
	[get(year_statistic, spelltype, 0) for year_statistic in year_statistics]
	end

	all_years = 1972:2007
	year_stats = [year_shares(records, year) for year in all_years]

	# to prove this thing actually works
	plot(all_years, extract_spell_statistics(year_stats, AMP.other_insured_time))

	# collect to a DataFrame
	year_df = let years = [], spelltypes = [], ratios = []
	for (year,stats) in zip(all_years, year_stats)
	for (spelltype, ratio) in stats
	push!(years, year)
	push!(spelltypes, spelltype)
	push!(ratios, ratio)
	end
	end
	DataFrame(year = years, spelltype = spelltypes, ratio = ratios)
	end

	# rather crowded plot
	plot(year_df, :year, :ratio, group=:spelltype)

	# let's make it simpler

	# average ratios
	ratio_avg = sort(aggregate(year_df, :spelltype, mean), cols=:ratio_mean)

	function pick_spelltypes(data, spelltypes)
	keep = [spelltype ∈ spelltypes for spelltype in data[:spelltype]]
	data[keep,:]
	end

	picked_plot(data, spelltypes) = plot(pick_spelltypes(data, spelltypes),
	:year, :ratio, group = :spelltype)

	p1 = picked_plot(year_df, ratio_avg[1:6, :spelltype])
	p2 = picked_plot(year_df, ratio_avg[7:12, :spelltype])
	p3 = picked_plot(year_df, ratio_avg[13:18, :spelltype])
	p4 = picked_plot(year_df, ratio_avg[19:end, :spelltype])

	plot(p1, p2, p3, p4)
No results found