Created
April 9, 2026 12:28
-
-
Save frafra/b9df1e46b9e7d4134814bc167eaa8185 to your computer and use it in GitHub Desktop.
GBIF backbone FTS with DuckDB
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| attach database 'backbone.db' as backbone (read_only); | |
| use backbone; | |
| select taxonID, canonicalName, scientificName, fts_main_taxon.match_bm25(taxonID, 'gulo gulo') as score | |
| from taxon | |
| where score is not null and taxonomicStatus = 'accepted' | |
| order by score desc; | |
| select taxonID, canonicalName, taxonomicStatus, vernacularName, fts_main_vernacular_name_no.match_bm25(vernacularID, 'fjellrev') as score | |
| from vernacular_name_no | |
| join taxon using (taxonID) | |
| where score is not null | |
| order by score desc; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| INSTALL zipfs FROM community; LOAD zipfs; | |
| INSTALL cache_httpfs FROM community; LOAD cache_httpfs; | |
| CREATE TABLE taxon AS | |
| FROM 'zip://https://ipt.artsdatabanken.no/archive.do?r=artsnavnebase/Taxon.tsv' | |
| ; | |
| CREATE INDEX taxon_taxonid ON taxon (taxonID); | |
| PRAGMA create_fts_index( | |
| 'taxon', 'taxonID', 'canonicalName' | |
| ); | |
| CREATE TABLE vernacular_name AS | |
| SELECT DISTINCT CONCAT_WS('|', taxonID, language, vernacularName) AS vernacularID, * | |
| FROM 'zip://https://hosted-datasets.gbif.org/datasets/backbone/current/backbone.zip/VernacularName.tsv' | |
| ; | |
| CREATE TABLE vernacular_name_no AS | |
| FROM vernacular_name | |
| WHERE language = 'no' | |
| ; | |
| CREATE INDEX vernacular_name_taxonid ON vernacular_name (taxonID); | |
| CREATE INDEX vernacular_name_language ON vernacular_name (language); | |
| PRAGMA create_fts_index( | |
| 'vernacular_name_no', 'vernacularID', 'vernacularName', stemmer='norwegian' | |
| ); | |
| DROP TABLE vernacular_name_no; | |
| CREATE VIEW vernacular_name_no AS | |
| FROM vernacular_name | |
| WHERE language = 'no' | |
| ; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment