Please review SQL before using the data:
One degree
CREATE TABLE tim.one_deg ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' AS
SELECT
floor(decimalLatitude) AS lat,
floor(decimalLongitude) AS lng,
count(*) AS total
FROM
Please review SQL before using the data:
One degree
CREATE TABLE tim.one_deg ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' AS
SELECT
floor(decimalLatitude) AS lat,
floor(decimalLongitude) AS lng,
count(*) AS total
FROM
CREATE TABLE tim.tom_export ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' AS | |
SELECT | |
CAST(decimalLatitude AS INT) AS lat, | |
CAST(decimalLongitude AS INT) AS lng, | |
count(*) AS total | |
FROM | |
prod_a.occurrence_hdfs | |
WHERE | |
hasGeospatialIssues=0 AND decimalLatitude IS NOT NULL AND decimalLatitude IS NOT NULL | |
GROUP BY |
SELECT | |
n.nid AS id, | |
f.filename AS filename, | |
CASE t.gr_resource_type_tid WHEN 895 THEN 'document' WHEN 987 THEN 'presentation' ELSE 'resource' END as type, | |
REPLACE(REPLACE(ifnull(n.title,''),'\n',' '),'\r',' ') AS title, | |
REPLACE(REPLACE(ifnull(bo.body_value,''),'\n',' '),'\r',' ') AS body, | |
REPLACE(REPLACE(ifnull(ab.gr_abstract_value,''),'\n',' '),'\r',' ') AS abstract, | |
REPLACE(REPLACE(ifnull(a.gr_author_value,''),'\n',' '),'\r',' ') AS author, | |
substring(da.gr_date_of_publication_value,0,10) AS publicationDate, | |
u.gr_url_value AS url, |
<!-- Redirect users to demo for a better experience --> | |
<form class="form-search content-search" action="https://demo.gbif.org/search" method="get" id="search-block-form--2" accept-charset="UTF-8"> | |
<div> | |
<div> | |
<h2 class="element-invisible">Search form</h2> | |
<div class="input-group"> | |
<input title="Search news items and information pages..." placeholder="Search news items and information pages..." class="form-control form-text" type="text" id="edit-search-block-form--4" name="q" value="" size="15" maxlength="128"> | |
<span class="glyphicon glyphicon-search bring-forward"></span> | |
</div> | |
<div class="form-actions form-wrapper form-group" id="edit-actions--2"> |
Exception in thread "main" java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.hadoop.hbase.mapreduce.PatchedHFileOutputFormat2 not found | |
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2199) | |
at org.apache.hadoop.mapreduce.task.JobContextImpl.getOutputFormatClass(JobContextImpl.java:232) | |
at org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars(TableMapReduceUtil.java:829) | |
at org.apache.hadoop.hbase.mapreduce.PatchedHFileOutputFormat2.configureIncrementalLoad(PatchedHFileOutputFormat2.java:369) | |
at org.apache.hadoop.hbase.mapreduce.PatchedHFileOutputFormat2.configureIncrementalLoad(PatchedHFileOutputFormat2.java:335) | |
at org.apache.hadoop.hbase.mapreduce.PatchedHFileOutputFormat2.configureIncrementalLoad(PatchedHFileOutputFormat2.java:331) | |
at org.gbif.maps.spark.Configurations$.hfileOutputConfiguration(Configurations.scala:49) | |
at org.gbif.maps.spark.BackfillPoints$.build(BackfillPoints.scala:94) | |
at org.gbif.maps.spark.Backfill$.main(Backfil |
var srequest = require('sync-request'); | |
var fs = require('fs'); | |
// A very hacky script to issue an inventory request to each registered IPT (with reasonable timeout) and summarise the counts of the | |
// IPT opinion versus the GBIF.org index opinion. This only to get an idea of the extent of mismatch. | |
// NOTE: many fail due to https://github.com/gbif/ipt/issues/1344 | |
var res = srequest('GET', 'http://api.gbif.org/v1/installation?type=IPT_INSTALLATION&limit=10000'); | |
var json = JSON.parse(res.getBody()) |
val exportQuery = "SELECT " + fields.map("first(clean(" + _.toLowerCase + ")) AS " + _).mkString(",") + " FROM occ" | |
// Write a single parquet file of cleaned data per data resource | |
sqlContext.sql(exportQuery).write.partitionBy("dataresourceuid").format("parquet").save("/data/biocache-exports/transient/") | |
// The following is a workaround to circumvent the fact that the CSV writers in Spark 1.6 are not partitionable. | |
// get the dataresource keys | |
val resourceUids = sqlContext.sql("SELECT distinct dataresourceuid FROM occ").collect() |
-- Set the data provider keys that GBIF have: | |
UPDATE data_provider SET gbif_registry_key='c71f0513-c95a-4e98-abad-0c222b87ec66' WHERE id=1; | |
UPDATE data_provider SET gbif_registry_key='d98506d8-5ab1-4a26-9ef1-766bdb54304f' WHERE id=3; | |
UPDATE data_provider SET gbif_registry_key='3636faf8-c86a-42c5-9bc9-a2d4e6c0c891' WHERE id=4; | |
UPDATE data_provider SET gbif_registry_key='1849d721-3a33-4c50-9862-692df5351273' WHERE id=5; | |
UPDATE data_provider SET gbif_registry_key='0c167c76-464a-406e-8d0b-c6dd8314da18' WHERE id=6; | |
UPDATE data_provider SET gbif_registry_key='13dba6c4-e112-473d-a827-850256bf53c6' WHERE id=8; | |
UPDATE data_provider SET gbif_registry_key='d3dd95c3-93d4-4349-a13f-55abf89ad140' WHERE id=9; | |
UPDATE data_provider SET gbif_registry_key='98ba8467-5081-4d15-b168-32d893e18b76' WHERE id=10; | |
UPDATE data_provider SET gbif_registry_key='90da406e-9e67-4830-a480-befef56beedb' WHERE id=14; |
These were created with http://registry.als.scot/GBIF/* URLs recently, but seemingly don't exist in the NBN Atlas
GBIF key | Name | GUID |
---|---|---|
ed5d13f9-cca9-42b9-b681-3d50a1693865 | John Muir Trust - Plants, Bryophytes and Lichens recorded on the Sandwood Estate during 1998 & 2006. | GA000973 |
439c7428-4ec3-4925-8065-1a653a93fcf2 | John Muir Trust - Plants, Bryophytes and Lichens recorded on the Skye Estate during October 1995 & October 2004. | GA000969 |
2461b2e8-6bb6-4402-8b99-03b5d13885b4 | National Trust for Scotland - NTS Properties Sensitive Species Records 1800-2013 | GA001195 |
882369ec-9118-4d98-bed9-a44210a2e9cb | Lichens of Conservation Concern - Site Data, Wales | CCWJMP02 |
08895e67-911b-439e-9275-cd872452975e | Scottish Natural Heritage - Distribution of Scottish wildcats (Felis silvestris) in Scotland (2006-2008) & database extract (breeding and non-breeding records) | GA001142 |
2e649272-13e1-40d7-8e1e-e99bc85c9631 | Scottish Natural Heritage - Surveys of Najas flexilis (Slender naiad) in Scotland 2004 |
In GBIF but missing in collectory:
GBIF datasetKey | NBN GUID | Name |
---|---|---|
5700022f-8c24-4762-b15c-6c04cc0b02ed | GA000402 | Countryside Council for Wales - Welsh Lesser Horseshoe Bat Summer Roost Surveillance Database |
c9b0e882-fb04-4572-836e-cca306f2ff24 | GA000404 | Countryside Council for Wales - Important Welsh Bat Roosts Database (BATSITES-WALES) |
fbd8cfe2-db00-4276-b7bd-e7d9490d2e10 | GA000405 | Countryside Council for Wales - Bat Roosts Database- South Wales |
b6327887-f76c-4974-a467-d2856cf48bfe | GA000409 | Countryside Council for Wales - Bat Records - Montgomeryshire |
d5cc76e9-ccff-4759-b442-9917c1faaa88 | GA000417 | Countryside Council for Wales - Bat Roosts Database - Pembrokeshire |
c5b5c14a-3175-4e05-a662-13321911cdfc | GA000473 | Countryside Council for Wales - Dormouse Licence Return Data |