Created
July 28, 2020 21:23
-
-
Save simon-brooke/e64c7d190a4efe784a04d011e2597eeb to your computer and use it in GitHub Desktop.
Collect daily worldwide COVID 19 data from the European Centre for Disease Control, bung it into a local database, and report
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/awk -f | |
BEGIN { | |
FS=","; | |
OFS=", "; | |
print("delete from covid;"); | |
} | |
NR > 1 { | |
$1 = gensub( /(.*)/, "'\\1'", 1, $1); | |
$7 = gensub( /(.*)/, "'\\1'", 1, $7); | |
$8 = gensub( /(.*)/, "'\\1'", 1, $8); | |
$9 = gensub( /(.*)/, "'\\1'", 1, $9); | |
$11 = gensub( /(.*)/, "'\\1'", 1, $11); | |
if ($12 == "") { | |
$12 = 0; | |
} | |
if ($1 != "''" && length($8) == 4 && length($9) == 5 && $11 != "'Other'") { | |
printf("insert into covid values (%s);\n", $0); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- | |
-- PostgreSQL database dump | |
-- | |
-- Dumped from database version 10.12 (Ubuntu 10.12-0ubuntu0.18.04.1) | |
-- Dumped by pg_dump version 10.12 (Ubuntu 10.12-0ubuntu0.18.04.1) | |
SET statement_timeout = 0; | |
SET lock_timeout = 0; | |
SET idle_in_transaction_session_timeout = 0; | |
SET client_encoding = 'UTF8'; | |
SET standard_conforming_strings = on; | |
SELECT pg_catalog.set_config('search_path', '', false); | |
SET check_function_bodies = false; | |
SET xmloption = content; | |
SET client_min_messages = warning; | |
SET row_security = off; | |
-- | |
-- Name: plpgsql; Type: EXTENSION; Schema: -; Owner: | |
-- | |
CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog; | |
-- | |
-- Name: EXTENSION plpgsql; Type: COMMENT; Schema: -; Owner: | |
-- | |
COMMENT ON EXTENSION plpgsql IS 'PL/pgSQL procedural language'; | |
SET default_tablespace = ''; | |
SET default_with_oids = false; | |
-- | |
-- Name: covid; Type: TABLE; Schema: public; Owner: simon | |
-- | |
CREATE TABLE IF NOT EXISTS public.covid ( | |
daterep character varying(10) NOT NULL, | |
day integer, | |
month integer, | |
year integer, | |
cases integer, | |
deaths integer, | |
country character varying(64), | |
iso3166_2 character(2) NOT NULL, | |
iso3166_3 character(3), | |
population integer, | |
continent character varying(16), | |
cc_per_100k double precision, | |
PRIMARY KEY(daterep, iso3166_2) | |
); | |
-- | |
-- Name: eu_states; Type: TABLE; Schema: public; Owner: simon | |
-- | |
CREATE TABLE IF NOT EXISTS public.eu_states ( | |
name character varying(32), | |
accession integer, | |
population integer, | |
area integer, | |
gdp integer, | |
gdp_per_cap real, | |
currency character varying(16), | |
gini real, | |
hdi real, | |
meps integer, | |
language character varying(16), | |
iso3166_2 character(2) NOT NULL PRIMARY KEY, | |
iso3166_3 character(3) | |
); | |
-- | |
-- Data for Name: eu_states; Type: TABLE DATA; Schema: public; Owner: simon | |
-- | |
INSERT INTO public.eu_states VALUES ('Austria ', 1995, 8792500, 83855, 437582, 53558, 'euro ', 29.1000004, 0.907999992, 18, 'German', 'AT', 'AUT'); | |
INSERT INTO public.eu_states VALUES ('Belgium ', 1957, 11365834, 30528, 534230, 49529, 'euro ', 33, 0.916000009, 21, 'Dutch', 'BE', 'BEL'); | |
INSERT INTO public.eu_states VALUES ('Bulgaria ', 2007, 7101859, 110994, 55824, 24595, 'lev ', 29.2000008, 0.813000023, 17, 'Bulgarian ', 'BG', 'BGR'); | |
INSERT INTO public.eu_states VALUES ('Croatia ', 2013, 4154213, 56594, 57073, 27729, 'kuna ', 29, 0.83099997, 11, 'Croatian ', 'HR', 'HRV'); | |
INSERT INTO public.eu_states VALUES ('Cyprus ', 2004, 854802, 9251, 23263, 41407, 'euro ', 31.2000008, 0.869000018, 6, 'Greek', 'CY', 'CYP'); | |
INSERT INTO public.eu_states VALUES ('Czech Republic ', 2004, 10467628, 78866, 205270, 38834, 'koruna ', 25.7999992, 0.888000011, 21, 'Czech ', 'CZ', 'CZE'); | |
INSERT INTO public.eu_states VALUES ('Denmark ', 1973, 5743947, 43075, 342362, 53882, 'krone ', 24.7000008, 0.92900002, 13, 'Danish ', 'DK', 'DNK'); | |
INSERT INTO public.eu_states VALUES ('Estonia ', 2004, 1315635, 45227, 26506, 35853, 'euro ', 36, 0.870999992, 6, 'Estonian ', 'EE', 'EST'); | |
INSERT INTO public.eu_states VALUES ('Finland ', 1995, 5577282, 338424, 272649, 47975, 'euro ', 26.8999996, 0.920000017, 13, 'Finnish', 'FI', 'FIN'); | |
INSERT INTO public.eu_states VALUES ('France ', 1957, 67024633, 632833, 2833687, 47223, 'euro ', 32.7000008, 0.901000023, 74, 'French', 'FR', 'FRA'); | |
INSERT INTO public.eu_states VALUES ('Germany ', 1957, 83149300, 357386, 3874437, 53567, 'euro ', 28.2999992, 0.93599999, 96, 'German', 'DE', 'DEU'); | |
INSERT INTO public.eu_states VALUES ('Greece ', 1981, 10757293, 131990, 237970, 30252, 'euro ', 34.2999992, 0.870000005, 21, 'Greek ', 'GR', 'GRC'); | |
INSERT INTO public.eu_states VALUES ('Hungary ', 2004, 9797561, 93030, 136989, 34047, 'forint ', 30, 0.838, 21, 'Hungarian ', 'HU', 'HUN'); | |
INSERT INTO public.eu_states VALUES ('Italy ', 1957, 61219113, 301338, 2147744, 40470, 'euro ', 36, 0.879999995, 73, 'Italian', 'IT', 'ITA'); | |
INSERT INTO public.eu_states VALUES ('Latvia ', 2004, 1950116, 64589, 31972, 31402, 'euro ', 35.7000008, 0.847000003, 8, 'Latvian ', 'LV', 'LVA'); | |
INSERT INTO public.eu_states VALUES ('Lithuania ', 2004, 2847904, 65200, 48288, 36701, 'euro ', 35.7999992, 0.85799998, 11, 'Lithuanian ', 'LT', 'LTU'); | |
INSERT INTO public.eu_states VALUES ('Luxembourg ', 1957, 589370, 2586, 65683, 108951, 'euro ', 30.7999992, 0.903999984, 6, 'French', 'LU', 'LUX'); | |
INSERT INTO public.eu_states VALUES ('Malta ', 2004, 440433, 316, 10514, 47405, 'euro ', 25.7999992, 0.878000021, 6, 'Maltese', 'MT', 'MLT'); | |
INSERT INTO public.eu_states VALUES ('Netherlands ', 1957, 17220721, 41543, 880716, 58341, 'euro ', 30.8999996, 0.930999994, 26, 'Dutch', 'NL', 'NLD'); | |
INSERT INTO public.eu_states VALUES ('Poland ', 2004, 37972964, 312685, 547894, 33891, 'złoty ', 34.9000015, 0.86500001, 51, 'Polish ', 'PL', 'POL'); | |
INSERT INTO public.eu_states VALUES ('Portugal ', 1986, 10291027, 92212, 340715, 33665, 'euro ', 32.0999985, 0.847000003, 21, 'Portuguese ', 'PT', 'PRT'); | |
INSERT INTO public.eu_states VALUES ('Ireland ', 1973, 4774833, 70273, 250814, 83399, 'euro ', 34.2999992, 0.938000023, 11, 'English', 'IE', 'IRL'); | |
INSERT INTO public.eu_states VALUES ('Romania ', 2007, 19638309, 238391, 199093, 27998, 'leu ', 31.5, 0.81099999, 32, 'Romanian ', 'RO', 'ROU'); | |
INSERT INTO public.eu_states VALUES ('Slovakia ', 2004, 5435343, 49035, 99869, 36640, 'euro ', 25.7999992, 0.855000019, 13, 'Slovak ', 'SK', 'SVK'); | |
INSERT INTO public.eu_states VALUES ('Slovenia ', 2004, 2065895, 20273, 49570, 38462, 'euro ', 31.2000008, 0.896000028, 8, 'Slovene ', 'SI', 'SVN'); | |
INSERT INTO public.eu_states VALUES ('Spain ', 1986, 46528966, 504030, 1406538, 41592, 'euro ', 32, 0.890999973, 54, 'Spanish', 'ES', 'ESP'); | |
INSERT INTO public.eu_states VALUES ('Sweden ', 1995, 10080000, 449964, 570591, 54628, 'krona ', 25, 0.933000028, 20, 'Swedish ', 'SE', 'SWE'); | |
-- | |
-- PostgreSQL database dump complete | |
-- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Pull today's Covid data from the European Centre for Disease Control, and | |
# report. | |
# NO WARRANTY. If it breaks, you get to keep all the bits. | |
# | |
# The `ssconvert` program is part of the Gnumeric spreadsheet package from | |
# http://www.gnumeric.org/. You should install this. You also need basic | |
# UN*X tools (awk, bash) as well as wget and postgres. | |
# | |
# The postgres database `covid` must exist before this script is run, and may | |
# be initialised using the file `covid.sql`: | |
# | |
# createdb covid | |
# psql covid < | |
TODAY=`date +"%Y-%m-%d"` | |
URL="https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-geographic-disbtribution-worldwide-${TODAY}.xlsx" | |
wget -O - $URL |\ | |
ssconvert -I Gnumeric_Excel:xlsx -T Gnumeric_stf:stf_csv fd://0 fd://1 |\ | |
awk -f COVID-19-geographic-disbtribution-worldwide.awk |\ | |
psql covid | |
# Edit `queries.sql` to report whatever data you're interested in. | |
psql covid < queries.sql |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- OK, this is a hack and I'm sure there are better ways | |
-- of doing it. But it works. | |
select now() - interval '7' day as lw, | |
date_part( 'month', now() - interval '7' day) as lwm, | |
date_part( 'day', now() - interval '7' day) as lwd | |
into temporary table last_week; | |
-- Bizarrely, on 28th July, the the European Centre for Disease Control gave | |
-- 'EL' as the iso3166_2 code for Greece instead of 'GR'. 'EL', according to | |
-- listings on the web, is unassigned. | |
update covid set iso3166_2 = 'GR' where iso3166_2 = 'EL'; | |
select 'EU States' as name, sum(deaths) as deaths | |
from covid, eu_states, last_week | |
where covid.iso3166_2 = eu_states.iso3166_2 | |
and covid.month = last_week.lwm | |
and covid.day > last_week.lwd; | |
select 'UK' as name, sum(deaths) as deaths | |
from covid, last_week | |
where covid.iso3166_2 = 'UK' | |
and covid.month = last_week.lwm | |
and covid.day > last_week.lwd; | |
select eu_states.name, sum(deaths) as deaths | |
from covid, eu_states, last_week | |
where covid.iso3166_2 = eu_states.iso3166_2 | |
and covid.month = last_week.lwm | |
and covid.day > last_week.lwd | |
group by eu_states.name | |
order by deaths; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment