This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT job_id,app_name,nominal_time,missing_dependencies FROM COORD_ACTIONS | |
JOIN (SELECT id AS coord_id,app_name | |
FROM COORD_JOBS | |
WHERE bundle_id='0001994-130622061249977-oozie-oozi-B') AS coord_jobs | |
ON coord_id=job_id | |
WHERE status='WAITING' | |
ORDER BY app_name,nominal_time ASC; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
DATE="2013-06-19" | |
PATHS="/transform/sibyl/google/6291/Impression /input/sibyl/amp/6291/Impression" | |
for h in {1..23}; do | |
hour=`printf %02d00 $hour` | |
for basepath in $PATHS; do | |
path="${basepath}/${DATE}/${hour}" | |
hadoop fs -stat "${path}/_SUCCESS" || hadoop fs -touchz "${path}/_SUCCESS" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SET mapreduce.output.fileoutputformat.compress true; | |
SET mapreduce.output.fileoutputformat.compress.codec org.apache.hadoop.io.compress.SnappyCodec; | |
SET mapreduce.output.fileoutputformat.compress.type BLOCK; | |
T = LOAD '/logs/{lax1,nym1}/2013-05-23/**/urlus/*.snappy' USING PigStorage('\t') AS (chararray,chararray,chararray,chararray,chararray,chararray); | |
formatted = FOREACH T GENERATE REGEX_EXTRACT($0,'.+\\]:(.*)$',1) AS hostname, REGEX_EXTRACT($1,'(.*) UTC$', 1) AS tstamp,$2 AS incoming_url,$3 AS segments,$4 AS to_date,$5 AS url_match; | |
STORE formatted INTO '/tmp/urlus_for_netezza' USING PigStorage('\t'); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
chris@oj01-506:~$ hive -hiveconf hive.root.logger=ALL,console | |
13/05/16 18:01:51 WARN conf.Configuration: mapred.max.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.maxsize | |
13/05/16 18:01:51 WARN conf.Configuration: mapred.min.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize | |
13/05/16 18:01:51 WARN conf.Configuration: mapred.min.split.size.per.rack is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.rack | |
13/05/16 18:01:51 WARN conf.Configuration: mapred.min.split.size.per.node is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.node | |
13/05/16 18:01:51 WARN conf.Configuration: mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces | |
13/05/16 18:01:51 WARN conf.Configuration: mapred.reduce.tasks.speculative.execution is deprecated. Instead, use mapreduce.reduce.speculative | |
13/05/16 18:01:51 WARN conf.Configuration: org.apache.hadoop.hive.conf.LoopingByteArrayInputStream@62acc57:an attem |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE EXTERNAL TABLE IF NOT EXISTS pythia_profilemod | |
PARTITIONED BY (year INT, month INT, day INT, hour INT) | |
ROW FORMAT SERDE | |
'org.apache.hadoop.hive.serde2.avro.AvroSerDe' | |
WITH SERDEPROPERTIES ( | |
'avro.schema.url'='${SCHEMAPATH}') | |
STORED as INPUTFORMAT | |
'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' | |
OUTPUTFORMAT | |
'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# Uses a keytab to auth to kerberos and then creates an empty file/directory | |
# to notify oozie that data is ready. | |
# Specify the name of the dataset to notify for as the first argument, | |
# and optionally the day in UTC during which the data is intended to be loaded | |
# (defaults to the current day in UTC) | |
# e.g. : | |
# |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dc-ad-transaction-log-agent: syslogTcp(5141) | [value("dc", "dc") batch(20000, 15000) gzip ackedWriteAhead stubbornAppend insistentOpen < logicalSink("collector1") ? < logicalSink("collector20") ? logicalSink("collector19") > >, < nullDeco rpcSink("10.24.154.82",5141) ? nullDeco rpcSink("10.24.154.74",5141) >]; dc-ad-transaction-log-agent2: syslogTcp(5142) | [value("dc", "dc") batch(20000, 15000) gzip ackedWriteAhead stubbornAppend insistentOpen < logicalSink("collector11") ? < logicalSink("collector18") ? logicalSink("collector17") > >, < nullDeco rpcSink("10.24.154.74",5141) ? nullDeco rpcSink("10.24.154.78",5141) >]; dc-ad-transaction-log-agent3: syslogTcp(5143) | [value("dc", "dc") batch(20000, 15000) gzip ackedWriteAhead stubbornAppend insistentOpen < logicalSink("collector16") ? < logicalSink("collector15") ? logicalSink("collector14") > >, < nullDeco rpcSink("10.24.154.82",5142) ? nullDeco rpcSink("10.24.154.74",5142) >]; dc-ad-transaction-log-agent4: syslogTcp(5144) | [value("dc", "dc") batch(20000, 1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.collective.hive; | |
import org.apache.hadoop.hive.ql.exec.Description; | |
import org.apache.hadoop.hive.ql.exec.UDFArgumentException; | |
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; | |
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; | |
import org.apache.hadoop.hive.ql.metadata.HiveException; | |
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; | |
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyLongObjectInspector; | |
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyPrimitiveObjectInspectorFactory; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# Uses a keytab to auth to kerberos and then creates an empty file/directory | |
# to notify oozie that data is ready. | |
# Specify the name of the dataset to notify for as the first argument, | |
# and optionally the day in UTC during which the data is intended to be loaded | |
# (defaults to the current day in UTC) | |
# e.g. : | |
# |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ruby-1.9.3-p194] (master) | |
[02:19 PM] chris@apocalypse ~/src/collective/quasi/automati$ VBoxManage --version | |
4.1.23r80870 | |
[02:20 PM] chris@apocalypse ~/src/collective/quasi/automati$ vagrant --version | |
Vagrant version 0.9.0 | |
[ruby-1.9.3-p194] (master) | |
[02:13 PM] chris@apocalypse ~/src/collective/quasi$ git pull origin master | |
From github.com:collectivemedia/quasi | |
* branch master -> FETCH_HEAD |