Created
October 23, 2018 20:39
-
-
Save timrobertson100/325f624fab6af4c6ab9236c3c5aabebb to your computer and use it in GitHub Desktop.
Example for C4 to run the same download as https://www.gbif-uat.org/occurrence/download/0000122-180925135249949
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ADD JAR /tmp/hadoop-compress-1.3-SNAPSHOT.jar; | |
ADD JAR /tmp/occurrence-hive-0.89-20181017.084448-7.jar; | |
ADD JAR /tmp/brickhouse-0.6.0.jar; | |
ADD JAR /tmp/occurrence-common-0.89-20181017.084442-7.jar; | |
ADD JAR /tmp/gbif-api-0.72-20181012.105547-3.jar; | |
SET io.seqfile.compression.type=BLOCK; | |
SET mapred.output.compression.codec=org.gbif.hadoop.compress.d2.D2Codec; | |
SET io.compression.codecs=org.gbif.hadoop.compress.d2.D2Codec; | |
SET hive.exec.compress.output=true; | |
use uat; | |
CREATE TEMPORARY FUNCTION toISO8601 AS 'org.gbif.occurrence.hive.udf.ToISO8601UDF'; | |
CREATE TEMPORARY FUNCTION joinArray AS 'brickhouse.udf.collect.JoinArrayUDF'; | |
CREATE TABLE uat.tim4 ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' | |
TBLPROPERTIES ("serialization.null.format"="") | |
AS SELECT gbifid, datasetkey, occurrenceid, kingdom, phylum, class, order_, family, genus, species, infraspecificepithet, taxonrank, scientificname, countrycode, locality, publishingorgkey, decimallatitude, decimallongitude, coordinateuncertaintyinmeters, coordinateprecision, elevation, elevationaccuracy, depth, depthaccuracy, toISO8601(eventdate) AS eventdate, day, month, year, taxonkey, specieskey, basisofrecord, institutioncode, collectioncode, catalognumber, recordnumber, identifiedby, toISO8601(dateidentified) AS dateidentified, license, rightsholder, recordedby, typestatus, establishmentmeans, toISO8601(lastinterpreted) AS lastinterpreted, if(mediatype IS NULL,'',joinArray(mediatype,'\;')) AS mediatype, if(issue IS NULL,'',joinArray(issue,'\;')) AS issue | |
FROM uat.occurrence_hdfs | |
WHERE datasetkey = '292a71df-588b-48fa-9ab5-29ae868ba88c'; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment