Skip to content

Instantly share code, notes, and snippets.

@seahrh
seahrh / airflow_jinja_loop_dict.hql
Last active February 8, 2019 06:48
Loop dictionary in airflow jinja template
{% for k, v in params.lifecycle_days.items() %}
alter table {{ k }} drop if exists partition (ds<'{{ macros.ds_format(macros.ds_add(ds, v*-1), "%Y-%m-%d", "%Y%m%d") }}');
{% endfor %}
{% for k, v in params.lifecycle_days.items() %}
analyze table {{ k }} partition (ds='{{ ds_nodash }}') compute statistics;
{% endfor %}
function retry(isDone, next) {
var current_trial = 0, max_retry = 50, interval = 10, is_timeout = false;
var id = window.setInterval(
function() {
if (isDone()) {
window.clearInterval(id);
next(is_timeout);
}
if (current_trial++ > max_retry) {
window.clearInterval(id);
language: scala
jdk: oraclejdk8
scala:
- "2.11.12"
# Use container-based infrastructure
sudo: false
@seahrh
seahrh / pg_terminate_backend.sql
Created October 26, 2018 16:48
pg see and close connections
SELECT
pg_terminate_backend(pid)
FROM
pg_stat_activity
WHERE
-- don't kill my own connection!
pid <> pg_backend_pid()
-- don't kill the connections to other databases
AND datname = 'analytics'
;
@seahrh
seahrh / pg_disk_usage.sql
Created October 25, 2018 10:38
postgres queries to check disk usage
-- pg equivalent of DESCRIBE TABLE
select column_name, data_type, character_maximum_length
from INFORMATION_SCHEMA.COLUMNS where table_name = 'my_table_name';
-- General Table Size Information, Performance Snippets
-- Disk usage, Works with PostgreSQL>=9.2
-- This will report size information for all tables, in both raw bytes and "pretty" form.
SELECT *, pg_size_pretty(total_bytes) AS total
@seahrh
seahrh / lambda.sh
Created September 26, 2018 08:39 — forked from wrwrwr/lambda.sh
Package a Python module with NumPy and SciPy for AWS Lambda.
#!/usr/bin/env bash
# Path to the project directory (that should include requirements.txt),
# Files and directories within that need to be deployed.
project=../backend
contents=(module lamdba_handler.py)
# Unnecessary parts. Note that there are some inter-dependencies in SciPy,
# for example to use scipy.stats you also need scipy.linalg, scipy.integrate,
# scipy.misc, scipy.sparse, and scipy.special.
@seahrh
seahrh / vgg_extract.py
Created September 5, 2018 06:36 — forked from hiwonjoon/vgg_extract.py
Feature Extrcation and Write TF Record example
import numpy as np
import os
import tensorflow as tf
import vgg
import vgg_preprocessing
from pycocotools.coco import COCO
slim = tf.contrib.slim
LOG_DIR = './log/fe'
@seahrh
seahrh / artifactory_build.sbt
Last active June 17, 2018 03:23
Publish to artifactory
val artifactoryUrl = "http://host:port/artifactory/sbt-dev"
resolvers += "Artifactory" at artifactoryUrl
publishTo := Some("Artifactory Realm" at s"$artifactoryUrl;build.timestamp="
+ new java.util.Date().getTime)
credentials += Credentials(new File("credentials.properties"))
@seahrh
seahrh / tf_model_size.py
Created June 8, 2018 06:08
Get model size after L1 regularization
# Get model size after L1 regularization
# see: https://colab.research.google.com/notebooks/mlcc/sparsity_and_l1_regularization.ipynb?utm_source=mlcc&utm_campaign=colab-external&utm_medium=referral&utm_content=l1regularization-colab&hl=en#scrollTo=e6GfTI0CFhB8
def model_size(estimator):
variables = estimator.get_variable_names()
size = 0
for variable in variables:
if not any(x in variable
for x in ['global_step',
'centered_bias_weight',
# Shuffle dataframe
cities.reindex(np.random.permutation(cities.index))
# Read data from Google Cloud Storage
california_housing_dataframe = pd.read_csv("https://storage.googleapis.com/mledu-datasets/california_housing_train.csv", sep=",")
# Convert pandas data into a dict of np arrays
# where `key` is column name.