Skip to content

Instantly share code, notes, and snippets.

@jovianlin
jovianlin / seaborn_color_scheme.py
Created April 2, 2017 15:51
seaborn nice color scheme
%matplotlib inline
import seaborn as sns
sns.set_style("whitegrid")
sns.set(font_scale=1.5)
@jovianlin
jovianlin / custom_concat_columns.py
Created March 30, 2017 09:17
Custom Concat Columns for PySpark
from pyspark.sql.functions import col, concat, lit
custom_concat = [col('appName'), lit('|'), col('platform'), lit('|'),
col('carrier'), lit('|'), col('connectionType'), lit('|'),
col('country'), lit('|'), col('city'), lit('|'),
col('userAgent')]
# Add a new column entitled "custom_col"
union_df = union_df.withColumn('custom_col', concat(*custom_concat))
@jovianlin
jovianlin / test_graphframes.py
Created March 27, 2017 15:14
test_graphframes.py
# Create a Vertex DataFrame with unique ID column "id"
v = sqlContext.createDataFrame([
("a", "Alice", 34),
("b", "Bob", 36),
("c", "Charlie", 30),
], ["id", "name", "age"])
# Create an Edge DataFrame with "src" and "dst" columns
e = sqlContext.createDataFrame([
("a", "b", "friend"),
("b", "c", "follow"),
@jovianlin
jovianlin / add_subl.sh
Last active March 25, 2017 16:53
Opening Sublime Text on command line as "subl" on Mac OS
# Check out:
# https://gist.github.com/adrianorsouza/df4759b0583dcd112da4
# http://olivierlacan.com/posts/launch-sublime-text-3-from-the-command-line/
# To usr/bin
sudo ln -s /Applications/Sublime\ Text.app/Contents/SharedSupport/bin/subl /usr/bin/subl
# To use/***LOCAL***/bin
ln -s "/Applications/Sublime Text.app/Contents/SharedSupport/bin/subl" /usr/local/bin/subl
@jovianlin
jovianlin / jupyter_cosmetics.py
Created March 9, 2017 01:54
ipython-jupyter notebook cosmetics
# Widen width of notebook
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:98% !important; }</style>"))
# Set
import pandas as pd
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_colwidth', 200)
@jovianlin
jovianlin / my_standardscalar.py
Created December 24, 2016 08:02
StandardScaler implementation for standardizing data before model-training
def normalize(train, test):
mean, std = train.mean(), test.std()
train = (train - mean) / std
test = (test - mean) / std
return train, test
@jovianlin
jovianlin / plot_decision_boundary.py
Created December 22, 2016 14:32
Plot Decision Boundary
# Train the logistic rgeression classifier
clf = sklearn.linear_model.LogisticRegressionCV()
clf.fit(X, y)
# Plot the decision boundary
plot_decision_boundary(lambda x: clf.predict(x))
plt.title("Logistic Regression")
@jovianlin
jovianlin / clustering_cosine_similarity_matrix.py
Last active December 21, 2020 07:53
Clustering cosine similarity matrix
"""
### Problem Statement ###
Let's say you have a square matrix which consists of cosine similarities (values between 0 and 1).
This square matrix can be of any size.
You want to get clusters which maximize the values between elemnts in the cluster.
For example, for the following matrix:
| A | B | C | D
A | 1.0 | 0.1 | 0.6 | 0.4
B | 0.1 | 1.0 | 0.1 | 0.2
@jovianlin
jovianlin / fix_encoding.py
Created December 8, 2016 07:44
Quickfix for encoding errors
def fix_encoding(some_str):
return ''.join([c for c in some_str if 0x20 <= ord(c) <= 0x78])
@jovianlin
jovianlin / anaconda_tensorflow.txt
Last active December 7, 2016 13:54
Anaconda & Tensorflow
# ===================================================================================
# Many thanks to:
# https://uoa-eresearch.github.io/eresearch-cookbook/recipe/2014/11/20/conda/
#
# More info:
# https://www.continuum.io/blog/developer-blog/python-packages-and-environments-conda
# https://conda-forge.github.io/#about
# ===================================================================================
# conda info --env