Skip to content

Instantly share code, notes, and snippets.

@tpapp
Created February 7, 2017 21:12
Show Gist options
  • Select an option

  • Save tpapp/626c387f031f431f725db104cb8f6be2 to your computer and use it in GitHub Desktop.

Select an option

Save tpapp/626c387f031f431f725db104cb8f6be2 to your computer and use it in GitHub Desktop.
code for plotting (almost works)
using AMDB
using DataStructures
using IntervalSets
using Plots
using DataFrames # you may need to add these libraries
using StatPlots # with Pkg.add
records = deserialize_gz(expanduser("~/research/AMDB/data/AMDB_subsample.jls.gz"))
plotly() # this makes plots appear in your browser, you can use other backends
################################################################################
"""
Return a counter for total spell durations by type in `interval`, from
data `records`.
"""
function spell_durations_in_year(records, interval)
c = counter(AMP.Spell) # establish a counter that counts objects in AMP.Spell
for (_, data) in records # parsing out the data we want from records
for spell in data.AMP_spells
# converting the days into integers
l = convert(Int,
IntervalSets.width(intersect(spell.interval, interval)))
# collecting the spell types and number of days together
push!(c, spell.status, l)
end
end
c # showing the counter
end
"""
Normalized total durations by year, using `spell_durations_in_year`.
"""
function year_shares(records, year)
normalize(spell_durations_in_year(records,
Date(year, 1, 1)..Date(year, 12, 31)),1)
end
function extract_spell_statistics(year_statistics, spelltype)
[get(year_statistic, spelltype, 0) for year_statistic in year_statistics]
end
all_years = 1972:2007
year_stats = [year_shares(records, year) for year in all_years]
# to prove this thing actually works
plot(all_years, extract_spell_statistics(year_stats, AMP.other_insured_time))
# collect to a DataFrame
year_df = let years = [], spelltypes = [], ratios = []
for (year,stats) in zip(all_years, year_stats)
for (spelltype, ratio) in stats
push!(years, year)
push!(spelltypes, spelltype)
push!(ratios, ratio)
end
end
DataFrame(year = years, spelltype = spelltypes, ratio = ratios)
end
# rather crowded plot
plot(year_df, :year, :ratio, group=:spelltype)
# let's make it simpler
# average ratios
ratio_avg = sort(aggregate(year_df, :spelltype, mean), cols=:ratio_mean)
function pick_spelltypes(data, spelltypes)
keep = [spelltype ∈ spelltypes for spelltype in data[:spelltype]]
data[keep,:]
end
picked_plot(data, spelltypes) = plot(pick_spelltypes(data, spelltypes),
:year, :ratio, group = :spelltype)
p1 = picked_plot(year_df, ratio_avg[1:6, :spelltype])
p2 = picked_plot(year_df, ratio_avg[7:12, :spelltype])
p3 = picked_plot(year_df, ratio_avg[13:18, :spelltype])
p4 = picked_plot(year_df, ratio_avg[19:end, :spelltype])
plot(p1, p2, p3, p4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment