Skip to content

Instantly share code, notes, and snippets.

@alaiacano
Last active December 26, 2015 04:39
Show Gist options
  • Save alaiacano/7094681 to your computer and use it in GitHub Desktop.
Save alaiacano/7094681 to your computer and use it in GitHub Desktop.
def average_active_per_day():
last_month = (datetime.date.today() - datetime.timedelta(30)).strftime("%Y-%m-%d")
hive_query = """
SELECT day,
AVERAGE(unique_users)
FROM (SELECT day,
Distinct(user_id) AS unique_users
FROM activity_log
WHERE dt >= %s
GROUP BY day) x
""" % last_month
return hive.execute_query(query)
def average_active_per_day():
last_month = (datetime.date.today() - datetime.timedelta(30)).strftime("%Y-%m-%d")
yesterday = (datetime.date.today() - datetime.timedelta(1)).strftime("%Y-%m-%d")
mysql_query = """
SELECT SUM(unique_users)
FROM unique_users_per_day
WHERE dt >= %s
AND dt < %s
""" % (last_month, yesterday)
hive_query = """
SELECT SUM(unique_users)
FROM unique_users_per_day
WHERE dt = %s
""" % yesterday
already_calculated = mysql.execute_query(mysql_query)
new_data = hive.execute_query(hive_query)
update_query = """
INSERT INTO unique_users_per_day
(dt,
unique_users)
VALUES ("%s",
"%d")
""" % (yesterday, new_data)
mysql.execute_query(update_query)
return (already_calculated + new_data) / 30.0
case class BlogsTable(override val p:String*) extends MultipleTextLineFiles(p:_*) {
override def transformForRead(pipe : Pipe) = {
RichPipe(pipe).mapTo('line->('id, 'title, 'createdOn)) {
record:String =>
val cols = record.split("\1", -1)
if (cols.size != 3) {(None, None, None)} // Didn't get the expeced input!
else {
val id = strToInt(cols(0))
val title = cols(0)
val createdOn = tsStringToDate(cols(0))
(id, title, createdOn)
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment