Skip to content

Instantly share code, notes, and snippets.

View zachguo's full-sized avatar

Zach Guo zachguo

View GitHub Profile
@zachguo
zachguo / get_topic_features
Created July 22, 2014 22:18
Derive topic features from a text pandas series
import pandas as pd
from gensim import corpora, models
def get_topic_features(col):
"""Derive topic features from a text pandas series"""
# generate topics for corpora
colname = col.name
col = col.astype(str).apply(lambda x:x.split())
dictionary = corpora.Dictionary(col)
corpus = [dictionary.doc2bow(text) for text in col]
@zachguo
zachguo / print_cm.py
Last active May 31, 2022 17:39
Pretty print for sklearn confusion matrix
from sklearn.metrics import confusion_matrix
def print_cm(cm, labels, hide_zeroes=False, hide_diagonal=False, hide_threshold=None):
"""pretty print for confusion matrixes"""
columnwidth = max([len(x) for x in labels]+[5]) # 5 is value length
empty_cell = " " * columnwidth
# Print header
print " " + empty_cell,
for label in labels:
print "%{0}s".format(columnwidth) % label,
@zachguo
zachguo / OLS.py
Created March 2, 2014 06:34
ordinary linear regression & print full results
import pandas
import statsmodels.api as sm
import numpy as np
def print_full(x):
pandas.set_option('display.max_rows', len(x))
print(x)
pandas.reset_option('display.max_rows')
dataframe = pandas.read_csv("turnstile_data_master_with_weather.csv")
# "terminal-notifier" should be installed first
notify <- function(msg="Operation complete") {
in.osx <- (Sys.info()['sysname'] == "Darwin")
in.rstudio <- (Sys.getenv("RSTUDIO") == "1")
in.rgui <- (Sys.getenv("R_GUI_APP_REVISION") != "")
if (in.rstudio) { # hack to see if running in RStudio
title <- "RStudio"