Skip to content

Instantly share code, notes, and snippets.

View ghego's full-sized avatar

Francesco Mosconi ghego

View GitHub Profile
line_count_query="""
SELECT COUNT(*) FROM BREATHE.{table_name}
"""
for table in tables:
res = client.query(line_count_query.format(table_name=table)).to_dataframe().values[0, 0]
lines_counts.append((table, res))
lines_counts = pd.DataFrame(lines_counts, columns=['table', 'rows'])
@ghego
ghego / talks_links.md
Last active November 14, 2019 17:59
All the links for talks
@ghego
ghego / keras_model.py
Last active May 29, 2018 00:14
keras_model.py
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
model = Sequential()
model.add(Dense(10, input_dim=X_train.shape[1], activation='sigmoid'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer=Adam(lr=0.01),
loss='binary_crossentropy',
def nn(X, y, hidden_dim=10, learning_rate=0.01, epochs=100, debug=False):
input_dim = X.shape[1]
output_dim = y.shape[1]
# Make our model
model = dict(
w0 = np.random.randn(input_dim, hidden_dim),
w1 = np.random.randn(hidden_dim, output_dim)
)
@ghego
ghego / data.py
Last active February 21, 2018 18:52
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
np.random.seed(1)
# train comes from the titantic dataset provided by
# kaggle (https://www.kaggle.com/c/titanic/data)
data = pd.read_csv('./train.csv')
# Preprocess data
#! $HOME/anaconda/bin/python
# swap $HOME with your absolute path if this doesn't work
import sys
import io
import os
from nbformat import read, write, current_nbformat
from argparse import ArgumentParser
from glob import glob
from shutil import rmtree
@ghego
ghego / remove_output.py
Last active August 8, 2017 19:27 — forked from damianavila/remove_output.py
Remove output from IPython notebook from the command line (dev version 1.0)
"""
Usage: python remove_output.py notebook.ipynb [-o]
"""
import sys
import io
import os
from nbformat import read, write, current_nbformat
from argparse import ArgumentParser
def remove_outputs(nb):
## Some global parameters that you can change
VERSION=2.3.0
SPARK=spark-$VERSION-bin-hadoop2.7
INSTALL_PATH=$HOME/spark
URL=http://mirrors.ocf.berkeley.edu/apache/spark/spark-$VERSION/$SPARK.tgz
BASH_PROFILE=$HOME/.bash_profile
SPARK_PATH=$INSTALL_PATH/$SPARK
BIN_PATH=$SPARK_PATH/bin
@ghego
ghego / reformat json file to multiple lines
Last active August 29, 2015 14:19
reformat json file to multiple lines
cat in.json | sed 's/\(},\)/\1\'$'\n/g' | sed 's/\(\[\)/\1\'$'\n/g' | sed 's/\(\}\)\]/\1\'$'\n]/g' > out.json
@ghego
ghego / upgrade-all-pip
Created March 31, 2015 17:20
Upgrade all python packages installed with pip (that are not under conda supervision)
#based on http://stackoverflow.com/questions/2720014/upgrading-all-packages-with-pip
conda list | grep '<pip>' | cut -d ' ' -f 1 | xargs -n1 pip install -U