Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save simon-brooke/e64c7d190a4efe784a04d011e2597eeb to your computer and use it in GitHub Desktop.
Save simon-brooke/e64c7d190a4efe784a04d011e2597eeb to your computer and use it in GitHub Desktop.
Collect daily worldwide COVID 19 data from the European Centre for Disease Control, bung it into a local database, and report
#!/bin/awk -f
BEGIN {
FS=",";
OFS=", ";
print("delete from covid;");
}
NR > 1 {
$1 = gensub( /(.*)/, "'\\1'", 1, $1);
$7 = gensub( /(.*)/, "'\\1'", 1, $7);
$8 = gensub( /(.*)/, "'\\1'", 1, $8);
$9 = gensub( /(.*)/, "'\\1'", 1, $9);
$11 = gensub( /(.*)/, "'\\1'", 1, $11);
if ($12 == "") {
$12 = 0;
}
if ($1 != "''" && length($8) == 4 && length($9) == 5 && $11 != "'Other'") {
printf("insert into covid values (%s);\n", $0);
}
}
--
-- PostgreSQL database dump
--
-- Dumped from database version 10.12 (Ubuntu 10.12-0ubuntu0.18.04.1)
-- Dumped by pg_dump version 10.12 (Ubuntu 10.12-0ubuntu0.18.04.1)
SET statement_timeout = 0;
SET lock_timeout = 0;
SET idle_in_transaction_session_timeout = 0;
SET client_encoding = 'UTF8';
SET standard_conforming_strings = on;
SELECT pg_catalog.set_config('search_path', '', false);
SET check_function_bodies = false;
SET xmloption = content;
SET client_min_messages = warning;
SET row_security = off;
--
-- Name: plpgsql; Type: EXTENSION; Schema: -; Owner:
--
CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog;
--
-- Name: EXTENSION plpgsql; Type: COMMENT; Schema: -; Owner:
--
COMMENT ON EXTENSION plpgsql IS 'PL/pgSQL procedural language';
SET default_tablespace = '';
SET default_with_oids = false;
--
-- Name: covid; Type: TABLE; Schema: public; Owner: simon
--
CREATE TABLE IF NOT EXISTS public.covid (
daterep character varying(10) NOT NULL,
day integer,
month integer,
year integer,
cases integer,
deaths integer,
country character varying(64),
iso3166_2 character(2) NOT NULL,
iso3166_3 character(3),
population integer,
continent character varying(16),
cc_per_100k double precision,
PRIMARY KEY(daterep, iso3166_2)
);
--
-- Name: eu_states; Type: TABLE; Schema: public; Owner: simon
--
CREATE TABLE IF NOT EXISTS public.eu_states (
name character varying(32),
accession integer,
population integer,
area integer,
gdp integer,
gdp_per_cap real,
currency character varying(16),
gini real,
hdi real,
meps integer,
language character varying(16),
iso3166_2 character(2) NOT NULL PRIMARY KEY,
iso3166_3 character(3)
);
--
-- Data for Name: eu_states; Type: TABLE DATA; Schema: public; Owner: simon
--
INSERT INTO public.eu_states VALUES ('Austria ', 1995, 8792500, 83855, 437582, 53558, 'euro ', 29.1000004, 0.907999992, 18, 'German', 'AT', 'AUT');
INSERT INTO public.eu_states VALUES ('Belgium ', 1957, 11365834, 30528, 534230, 49529, 'euro ', 33, 0.916000009, 21, 'Dutch', 'BE', 'BEL');
INSERT INTO public.eu_states VALUES ('Bulgaria ', 2007, 7101859, 110994, 55824, 24595, 'lev ', 29.2000008, 0.813000023, 17, 'Bulgarian ', 'BG', 'BGR');
INSERT INTO public.eu_states VALUES ('Croatia ', 2013, 4154213, 56594, 57073, 27729, 'kuna ', 29, 0.83099997, 11, 'Croatian ', 'HR', 'HRV');
INSERT INTO public.eu_states VALUES ('Cyprus ', 2004, 854802, 9251, 23263, 41407, 'euro ', 31.2000008, 0.869000018, 6, 'Greek', 'CY', 'CYP');
INSERT INTO public.eu_states VALUES ('Czech Republic ', 2004, 10467628, 78866, 205270, 38834, 'koruna ', 25.7999992, 0.888000011, 21, 'Czech ', 'CZ', 'CZE');
INSERT INTO public.eu_states VALUES ('Denmark ', 1973, 5743947, 43075, 342362, 53882, 'krone ', 24.7000008, 0.92900002, 13, 'Danish ', 'DK', 'DNK');
INSERT INTO public.eu_states VALUES ('Estonia ', 2004, 1315635, 45227, 26506, 35853, 'euro ', 36, 0.870999992, 6, 'Estonian ', 'EE', 'EST');
INSERT INTO public.eu_states VALUES ('Finland ', 1995, 5577282, 338424, 272649, 47975, 'euro ', 26.8999996, 0.920000017, 13, 'Finnish', 'FI', 'FIN');
INSERT INTO public.eu_states VALUES ('France ', 1957, 67024633, 632833, 2833687, 47223, 'euro ', 32.7000008, 0.901000023, 74, 'French', 'FR', 'FRA');
INSERT INTO public.eu_states VALUES ('Germany ', 1957, 83149300, 357386, 3874437, 53567, 'euro ', 28.2999992, 0.93599999, 96, 'German', 'DE', 'DEU');
INSERT INTO public.eu_states VALUES ('Greece ', 1981, 10757293, 131990, 237970, 30252, 'euro ', 34.2999992, 0.870000005, 21, 'Greek ', 'GR', 'GRC');
INSERT INTO public.eu_states VALUES ('Hungary ', 2004, 9797561, 93030, 136989, 34047, 'forint ', 30, 0.838, 21, 'Hungarian ', 'HU', 'HUN');
INSERT INTO public.eu_states VALUES ('Italy ', 1957, 61219113, 301338, 2147744, 40470, 'euro ', 36, 0.879999995, 73, 'Italian', 'IT', 'ITA');
INSERT INTO public.eu_states VALUES ('Latvia ', 2004, 1950116, 64589, 31972, 31402, 'euro ', 35.7000008, 0.847000003, 8, 'Latvian ', 'LV', 'LVA');
INSERT INTO public.eu_states VALUES ('Lithuania ', 2004, 2847904, 65200, 48288, 36701, 'euro ', 35.7999992, 0.85799998, 11, 'Lithuanian ', 'LT', 'LTU');
INSERT INTO public.eu_states VALUES ('Luxembourg ', 1957, 589370, 2586, 65683, 108951, 'euro ', 30.7999992, 0.903999984, 6, 'French', 'LU', 'LUX');
INSERT INTO public.eu_states VALUES ('Malta ', 2004, 440433, 316, 10514, 47405, 'euro ', 25.7999992, 0.878000021, 6, 'Maltese', 'MT', 'MLT');
INSERT INTO public.eu_states VALUES ('Netherlands ', 1957, 17220721, 41543, 880716, 58341, 'euro ', 30.8999996, 0.930999994, 26, 'Dutch', 'NL', 'NLD');
INSERT INTO public.eu_states VALUES ('Poland ', 2004, 37972964, 312685, 547894, 33891, 'złoty ', 34.9000015, 0.86500001, 51, 'Polish ', 'PL', 'POL');
INSERT INTO public.eu_states VALUES ('Portugal ', 1986, 10291027, 92212, 340715, 33665, 'euro ', 32.0999985, 0.847000003, 21, 'Portuguese ', 'PT', 'PRT');
INSERT INTO public.eu_states VALUES ('Ireland ', 1973, 4774833, 70273, 250814, 83399, 'euro ', 34.2999992, 0.938000023, 11, 'English', 'IE', 'IRL');
INSERT INTO public.eu_states VALUES ('Romania ', 2007, 19638309, 238391, 199093, 27998, 'leu ', 31.5, 0.81099999, 32, 'Romanian ', 'RO', 'ROU');
INSERT INTO public.eu_states VALUES ('Slovakia ', 2004, 5435343, 49035, 99869, 36640, 'euro ', 25.7999992, 0.855000019, 13, 'Slovak ', 'SK', 'SVK');
INSERT INTO public.eu_states VALUES ('Slovenia ', 2004, 2065895, 20273, 49570, 38462, 'euro ', 31.2000008, 0.896000028, 8, 'Slovene ', 'SI', 'SVN');
INSERT INTO public.eu_states VALUES ('Spain ', 1986, 46528966, 504030, 1406538, 41592, 'euro ', 32, 0.890999973, 54, 'Spanish', 'ES', 'ESP');
INSERT INTO public.eu_states VALUES ('Sweden ', 1995, 10080000, 449964, 570591, 54628, 'krona ', 25, 0.933000028, 20, 'Swedish ', 'SE', 'SWE');
--
-- PostgreSQL database dump complete
--
#!/bin/bash
# Pull today's Covid data from the European Centre for Disease Control, and
# report.
# NO WARRANTY. If it breaks, you get to keep all the bits.
#
# The `ssconvert` program is part of the Gnumeric spreadsheet package from
# http://www.gnumeric.org/. You should install this. You also need basic
# UN*X tools (awk, bash) as well as wget and postgres.
#
# The postgres database `covid` must exist before this script is run, and may
# be initialised using the file `covid.sql`:
#
# createdb covid
# psql covid <
TODAY=`date +"%Y-%m-%d"`
URL="https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-geographic-disbtribution-worldwide-${TODAY}.xlsx"
wget -O - $URL |\
ssconvert -I Gnumeric_Excel:xlsx -T Gnumeric_stf:stf_csv fd://0 fd://1 |\
awk -f COVID-19-geographic-disbtribution-worldwide.awk |\
psql covid
# Edit `queries.sql` to report whatever data you're interested in.
psql covid < queries.sql
-- OK, this is a hack and I'm sure there are better ways
-- of doing it. But it works.
select now() - interval '7' day as lw,
date_part( 'month', now() - interval '7' day) as lwm,
date_part( 'day', now() - interval '7' day) as lwd
into temporary table last_week;
-- Bizarrely, on 28th July, the the European Centre for Disease Control gave
-- 'EL' as the iso3166_2 code for Greece instead of 'GR'. 'EL', according to
-- listings on the web, is unassigned.
update covid set iso3166_2 = 'GR' where iso3166_2 = 'EL';
select 'EU States' as name, sum(deaths) as deaths
from covid, eu_states, last_week
where covid.iso3166_2 = eu_states.iso3166_2
and covid.month = last_week.lwm
and covid.day > last_week.lwd;
select 'UK' as name, sum(deaths) as deaths
from covid, last_week
where covid.iso3166_2 = 'UK'
and covid.month = last_week.lwm
and covid.day > last_week.lwd;
select eu_states.name, sum(deaths) as deaths
from covid, eu_states, last_week
where covid.iso3166_2 = eu_states.iso3166_2
and covid.month = last_week.lwm
and covid.day > last_week.lwd
group by eu_states.name
order by deaths;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment