Skip to content

Instantly share code, notes, and snippets.

@timrobertson100
Created October 23, 2018 20:39
Show Gist options
  • Save timrobertson100/325f624fab6af4c6ab9236c3c5aabebb to your computer and use it in GitHub Desktop.
Save timrobertson100/325f624fab6af4c6ab9236c3c5aabebb to your computer and use it in GitHub Desktop.
ADD JAR /tmp/hadoop-compress-1.3-SNAPSHOT.jar;
ADD JAR /tmp/occurrence-hive-0.89-20181017.084448-7.jar;
ADD JAR /tmp/brickhouse-0.6.0.jar;
ADD JAR /tmp/occurrence-common-0.89-20181017.084442-7.jar;
ADD JAR /tmp/gbif-api-0.72-20181012.105547-3.jar;
SET io.seqfile.compression.type=BLOCK;
SET mapred.output.compression.codec=org.gbif.hadoop.compress.d2.D2Codec;
SET io.compression.codecs=org.gbif.hadoop.compress.d2.D2Codec;
SET hive.exec.compress.output=true;
use uat;
CREATE TEMPORARY FUNCTION toISO8601 AS 'org.gbif.occurrence.hive.udf.ToISO8601UDF';
CREATE TEMPORARY FUNCTION joinArray AS 'brickhouse.udf.collect.JoinArrayUDF';
CREATE TABLE uat.tim4 ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
TBLPROPERTIES ("serialization.null.format"="")
AS SELECT gbifid, datasetkey, occurrenceid, kingdom, phylum, class, order_, family, genus, species, infraspecificepithet, taxonrank, scientificname, countrycode, locality, publishingorgkey, decimallatitude, decimallongitude, coordinateuncertaintyinmeters, coordinateprecision, elevation, elevationaccuracy, depth, depthaccuracy, toISO8601(eventdate) AS eventdate, day, month, year, taxonkey, specieskey, basisofrecord, institutioncode, collectioncode, catalognumber, recordnumber, identifiedby, toISO8601(dateidentified) AS dateidentified, license, rightsholder, recordedby, typestatus, establishmentmeans, toISO8601(lastinterpreted) AS lastinterpreted, if(mediatype IS NULL,'',joinArray(mediatype,'\;')) AS mediatype, if(issue IS NULL,'',joinArray(issue,'\;')) AS issue
FROM uat.occurrence_hdfs
WHERE datasetkey = '292a71df-588b-48fa-9ab5-29ae868ba88c';
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment