Skip to content

Instantly share code, notes, and snippets.

-- point compositae children to asteraceae
UPDATE name_usage set parent_fk=3065 where dataset_key=nubKey() and parent_fk=6070956;
-- point compositae descendants to asteraceae
UPDATE name_usage set family_fk=3065 where dataset_key=nubKey() and family_fk=6070956;
-- update asteraceae usage
UPDATE name_usage set num_descendants=num_descendants+1936 where id=3065;
UPDATE name_usage_metrics set count_children=count_children+573, count_g=count_g+573, count_s=count_s+1351 where id=3065;
-- update compositae usage
UPDATE name_usage set parent_fk=3065, is_synonym=true, status='SYNONYM', family_fk=3065, num_descendants=0 where id=6070956;
UPDATE name_usage_metrics set count_children=0, count_synonyms=0, count_p=0, count_c=0, count_o=0, count_f=0, count_g=0, count_sg=0, count_s=0 where id=6070956;
@mdoering
mdoering / taxa.tsv
Created October 7, 2016 08:02
Taxonomic CSV example using Darwin Core terms
taxonID parentNameUsageID acceptedNameUsageID originalNameUsageID taxonRank taxonomicStatus scientificName nameAccordingTo namePublishedInYear namePublishedIn namePublishedInID nomenclaturalStatus dc:references typification taxonRemarks
k1 kingdom Animalia
k2 kingdom Plantae
k3 kingdom Fungi
k4 kingdom Protozoa
k5 kingdom Chromista
p1 k1 phylum Arthropoda
p2 k1 phylum Mollusca
p3 k3 phylum Ascomycota
p4 k1 phylum Ctenophora
@mdoering
mdoering / explain download.sql
Created January 24, 2017 12:52
explain download
prod_b_registry=# explain analyze SELECT download_key,dataset_key,dataset_title,dataset_doi,dataset_citation,number_records,
prod_b_registry-# key,doi,license,filter,status,download_link,size,total_records,notification_addresses,created_by,send_notification,format,created
prod_b_registry-# FROM dataset_occurrence_download JOIN occurrence_download ON download_key = key
prod_b_registry-# WHERE dataset_key = 'f5de707d-eba1-4f1a-b1df-1a7fa27b1bc7'
prod_b_registry-# ORDER BY created DESC, key
prod_b_registry-# LIMIT 10 OFFSET 27990;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=153436.95..153436.97 rows=10 width=625) (actual time=25626.880..25626.885 rows=10 loops=1)
-> Sort
@mdoering
mdoering / backbone-2017-02.md
Created February 27, 2017 11:00
GBIF Backbone - February 2017 Update

GBIF Backbone - February 2017 Update

We are happy to annouce that a new GBIF Backbone just went live, available also as an improved Darwin Core Archive for download. Here are some facts highlighting the important changes.

New source datasets

Apart from continously updated source like the Catalog of Life or WoRMS here are the new datasets we used as a source to build the backbone.

@mdoering
mdoering / create_occ_extensions_hbase.hsql
Created March 1, 2017 11:12
Create occ_extensions_hbase
USE mdoering;
-- Base HBase backed table
CREATE EXTERNAL TABLE IF NOT EXISTS mdoering.occ_extensions_hbase (
gbifid INT,
dataset_key STRING,
protocol STRING,
ac_multimedia STRING,
gbif_description STRING,
gbif_distribution STRING,
eol_document STRING,
@mdoering
mdoering / v_nub_families.sql
Last active March 3, 2017 22:03
Create CLB v_nub_families view
-- header row:
-- id family status num_descendants accepted_key accepted order_key order class_key class phylum_key phylum kingdom_key kingdom
CREATE VIEW v_nub_families AS
SELECT f.id, fn.scientific_name as family, f.status, f.num_descendants,
a.id as accepted_key, an.scientific_name as accepted,
f.order_fk as order_key, orn.scientific_name as "order",
f.class_fk as class_key, cn.scientific_name as "class",
f.phylum_fk as phylum_key, pn.scientific_name as phylum,
f.kingdom_fk as kingdom_key, kn.scientific_name as kingdom
@mdoering
mdoering / Sciname.java
Last active May 24, 2017 12:41
sciname regex
static class Name {
public int key;
public int nameKey;
public String rank;
public String nameType;
public String canonicalName;
public String authorship;
public String scientificName;
}
@mdoering
mdoering / doc.md
Last active October 23, 2017 13:53
Testing intra document links in github flavored markdown
@mdoering
mdoering / partition.sql
Created June 13, 2018 13:28
Trying Postgres partitioning for CLB
DROP SCHEMA PUBLIC CASCADE;
CREATE SCHEMA PUBLIC;
CREATE TABLE dataset (
key serial PRIMARY KEY,
title TEXT NOT NULL,
created TIMESTAMP WITHOUT TIME ZONE DEFAULT NOW()
);
CREATE TABLE verbatim (
@mdoering
mdoering / index.sql
Created August 30, 2018 11:01
Rough comparison of query performance of int/text columns with hash index and partitioned tables
-- surrogate & text keys
create table ni (
key serial primary key,
id text UNIQUE,
dataset_key int,
name text
);
create table ti (
key serial primary key,