Skip to content

Instantly share code, notes, and snippets.

@IvanIvanoff
Last active June 26, 2021 23:11
Show Gist options
  • Save IvanIvanoff/061b9b49d4a334b7d3100feebf6f9f61 to your computer and use it in GitHub Desktop.
Save IvanIvanoff/061b9b49d4a334b7d3100feebf6f9f61 to your computer and use it in GitHub Desktop.
analyze csv
Mix.install([{:csv, "~> 2.4"}])
num_to_column = fn num ->
cond do
num <= 25 -> <<?A + num::utf8>>
num <= 51 -> "A" <> <<?A + (num - 26)::utf8>>
num <= 76 -> "B" <> <<?A + (num - 51)::utf8>>
num <= 100 -> "C" <> <<?A + (num - 75)::utf8>>
true -> raise("Boom")
end
end
column_to_num_map = 0..100 |> Enum.to_list() |> Map.new(fn num -> {num_to_column.(num), num} end)
data = File.stream!("data.csv") |> CSV.decode!() |> Enum.take(1000)
women = data |> Enum.filter(fn elem -> Enum.at(elem, 50) == "Жена" end)
men = data |> Enum.filter(fn elem -> Enum.at(elem, 50) == "Мъж" end)
women_age_groups = women |> Enum.group_by(&Enum.at(&1, 51))
group = Map.get(women_age_groups, "46-68", [])
women_age_groups =
women_age_groups |> Map.delete("46-68") |> Map.update("46-72", group, &(group ++ &1))
men_age_groups = men |> Enum.group_by(&Enum.at(&1, 51))
group = Map.get(men_age_groups, "46-68", [])
men_age_groups =
men_age_groups |> Map.delete("46-68") |> Map.update("46-72", group, &(group ++ &1))
two_letter_columns = ~w(AE AF AG AH AI AJ AL AM AN AO AP AQ AS AT AU AV AW AX)
two_letter_columns_positions = Enum.map(two_letter_columns, &Map.get(column_to_num_map, &1))
column_positions = [?C, ?D, ?E, ?F, ?G, ?H, ?J, ?K, ?L, ?M, ?N, ?O, ?Q, ?R, ?S, ?T, ?U, ?V, ?H, ?Y, ?Z] |> Enum.map(&(&1 - ?A))
column_positions = column_positions ++ two_letter_columns_positions
maybe_int = fn
"" ->
"Без отговор"
bin when is_binary(bin) ->
case Integer.parse(bin) do
{num, ""} -> num
_ -> bin
end
x ->
x
end
print_stats = fn map, name ->
map
|> Enum.map(fn {age_group, list} ->
IO.puts("#{name} #{age_group} (Общ брой: #{length(list)})")
Enum.each(column_positions, fn column ->
data = Enum.map(list, &Enum.at(&1, column))
freq = Enum.frequencies(data) |> Map.new(fn {k, v} -> {maybe_int.(k), v} end)
freq = Map.merge(%{1 => 0, 2 => 0, 3 => 0, 4 => 0, 5 => 0}, freq)
IO.puts(" Колона #{num_to_column.(column)}: #{inspect(freq)}")
end)
end)
end
stats_to_csv = fn map, name ->
map
|> Enum.flat_map(fn {age_group, list} ->
Enum.map(column_positions, fn column ->
data = Enum.map(list, &Enum.at(&1, column))
freq = Enum.frequencies(data) |> Map.new(fn {k, v} -> {maybe_int.(k), v} end)
freq = Map.merge(%{1 => 0, 2 => 0, 3 => 0, 4 => 0, 5 => 0}, freq)
[name, age_group, num_to_column.(column), freq[1], freq[2], freq[3], freq[4], freq[5]]
end)
end)
|> CSV.encode()
end
print_stats.(women_age_groups, "Жени")
IO.puts("\n")
print_stats.(men_age_groups, "Мъже")
csv = stats_to_csv.(women_age_groups, "Жени") |> Enum.take(1000)
File.write("new_data_women.csv", csv)
csv = stats_to_csv.(men_age_groups, "Мъже") |> Enum.take(1000)
File.write("new_data_men.csv", csv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment