Ruhong seahrh

seahrh / airflow_jinja_loop_dict.hql

Last active February 8, 2019 06:48

Loop dictionary in airflow jinja template

	{% for k, v in params.lifecycle_days.items() %}
	alter table {{ k }} drop if exists partition (ds<'{{ macros.ds_format(macros.ds_add(ds, v*-1), "%Y-%m-%d", "%Y%m%d") }}');
	{% endfor %}

	{% for k, v in params.lifecycle_days.items() %}
	analyze table {{ k }} partition (ds='{{ ds_nodash }}') compute statistics;
	{% endfor %}

seahrh / private-mode-detection.js

Created November 28, 2018 09:51

	function retry(isDone, next) {
	var current_trial = 0, max_retry = 50, interval = 10, is_timeout = false;
	var id = window.setInterval(
	function() {
	if (isDone()) {
	window.clearInterval(id);
	next(is_timeout);
	}
	if (current_trial++ > max_retry) {
	window.clearInterval(id);

seahrh / spark.travis.yml

Created November 28, 2018 09:47

	language: scala

	jdk: oraclejdk8

	scala:
	- "2.11.12"

	# Use container-based infrastructure
	sudo: false

seahrh / pg_terminate_backend.sql

Created October 26, 2018 16:48

pg see and close connections

	SELECT
	pg_terminate_backend(pid)
	FROM
	pg_stat_activity
	WHERE
	-- don't kill my own connection!
	pid <> pg_backend_pid()
	-- don't kill the connections to other databases
	AND datname = 'analytics'
	;

seahrh / pg_disk_usage.sql

Created October 25, 2018 10:38

postgres queries to check disk usage

	-- pg equivalent of DESCRIBE TABLE

	select column_name, data_type, character_maximum_length
	from INFORMATION_SCHEMA.COLUMNS where table_name = 'my_table_name';

	-- General Table Size Information, Performance Snippets
	-- Disk usage, Works with PostgreSQL>=9.2
	-- This will report size information for all tables, in both raw bytes and "pretty" form.

	SELECT *, pg_size_pretty(total_bytes) AS total

seahrh / lambda.sh

Created September 26, 2018 08:39 — forked from wrwrwr/lambda.sh

Package a Python module with NumPy and SciPy for AWS Lambda.

	#!/usr/bin/env bash

	# Path to the project directory (that should include requirements.txt),
	# Files and directories within that need to be deployed.
	project=../backend
	contents=(module lamdba_handler.py)

	# Unnecessary parts. Note that there are some inter-dependencies in SciPy,
	# for example to use scipy.stats you also need scipy.linalg, scipy.integrate,
	# scipy.misc, scipy.sparse, and scipy.special.

seahrh / vgg_extract.py

Created September 5, 2018 06:36 — forked from hiwonjoon/vgg_extract.py

Feature Extrcation and Write TF Record example

	import numpy as np
	import os
	import tensorflow as tf
	import vgg
	import vgg_preprocessing
	from pycocotools.coco import COCO

	slim = tf.contrib.slim

	LOG_DIR = './log/fe'

seahrh / artifactory_build.sbt

Last active June 17, 2018 03:23

Publish to artifactory

	val artifactoryUrl = "http://host:port/artifactory/sbt-dev"

	resolvers += "Artifactory" at artifactoryUrl

	publishTo := Some("Artifactory Realm" at s"$artifactoryUrl;build.timestamp="
	+ new java.util.Date().getTime)

	credentials += Credentials(new File("credentials.properties"))

seahrh / tf_model_size.py

Created June 8, 2018 06:08

Get model size after L1 regularization

	# Get model size after L1 regularization
	# see: https://colab.research.google.com/notebooks/mlcc/sparsity_and_l1_regularization.ipynb?utm_source=mlcc&utm_campaign=colab-external&utm_medium=referral&utm_content=l1regularization-colab&hl=en#scrollTo=e6GfTI0CFhB8

	def model_size(estimator):
	variables = estimator.get_variable_names()
	size = 0
	for variable in variables:
	if not any(x in variable
	for x in ['global_step',
	'centered_bias_weight',

seahrh / pandas_ex.py

Last active June 7, 2018 07:51

	# Shuffle dataframe

	cities.reindex(np.random.permutation(cities.index))

	# Read data from Google Cloud Storage

	california_housing_dataframe = pd.read_csv("https://storage.googleapis.com/mledu-datasets/california_housing_train.csv", sep=",")

	# Convert pandas data into a dict of np arrays
	# where `key` is column name.