This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def feature_selection(X_train, y_train, X_test): | |
feature_selector = LogisticRegression(C=0.1, penalty="l1", dual=False) | |
X_train = feature_selector.fit_transform(X_train, y_train) | |
X_test = feature_selector.transform(X_test) | |
return X_train, X_test, feature_selector | |
def get_support(feature_selector): | |
return list(set(np.where(feature_selector.coef_ != 0)[-1])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<head> | |
<title>d3test</title> | |
</head> | |
<body> | |
<h3>Meteor JS: Reactive D3 Force Layout Graph (minimum example)</h3> | |
<h4>Add few nodes and links first, or visualization won't show up.</h4> | |
<span>New Node</span> | |
<form id="newnode"> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from gensim import corpora, models | |
def get_topic_features(col): | |
"""Derive topic features from a text pandas series""" | |
# generate topics for corpora | |
colname = col.name | |
col = col.astype(str).apply(lambda x:x.split()) | |
dictionary = corpora.Dictionary(col) | |
corpus = [dictionary.doc2bow(text) for text in col] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# internet_rate is "Households with no internet use in and outside the home in 2010", gathered from http://www.ntia.doc.gov/files/ntia/data/CPS2010Tables/t11_2.txt | |
STATES = {"AL":{"full_name":"ALABAMA", | |
"geo_region":"South", | |
"internet_rate":25.82}, | |
"AK":{"full_name":"ALASKA", | |
"geo_region":"West", | |
"internet_rate":11.36}, | |
"AS":{"full_name":"AMERICAN SAMOA", | |
"geo_region":"Outer", | |
"internet_rate":None}, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.metrics import confusion_matrix | |
def print_cm(cm, labels, hide_zeroes=False, hide_diagonal=False, hide_threshold=None): | |
"""pretty print for confusion matrixes""" | |
columnwidth = max([len(x) for x in labels]+[5]) # 5 is value length | |
empty_cell = " " * columnwidth | |
# Print header | |
print " " + empty_cell, | |
for label in labels: | |
print "%{0}s".format(columnwidth) % label, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas | |
import statsmodels.api as sm | |
import numpy as np | |
def print_full(x): | |
pandas.set_option('display.max_rows', len(x)) | |
print(x) | |
pandas.reset_option('display.max_rows') | |
dataframe = pandas.read_csv("turnstile_data_master_with_weather.csv") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
a | |
a's | |
a’s | |
able | |
about | |
above | |
according | |
accordingly | |
across | |
actually |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# "terminal-notifier" should be installed first | |
notify <- function(msg="Operation complete") { | |
in.osx <- (Sys.info()['sysname'] == "Darwin") | |
in.rstudio <- (Sys.getenv("RSTUDIO") == "1") | |
in.rgui <- (Sys.getenv("R_GUI_APP_REVISION") != "") | |
if (in.rstudio) { # hack to see if running in RStudio | |
title <- "RStudio" |