Skip to content

Instantly share code, notes, and snippets.

# instead of generating all possible values of GRE and GPA, we're going
# to use an evenly spaced range of 10 values from the min to the max
gres = np.linspace(data['gre'].min(), data['gre'].max(), 10)
print gres
# array([ 220. , 284.44444444, 348.88888889, 413.33333333,
# 477.77777778, 542.22222222, 606.66666667, 671.11111111,
# 735.55555556, 800. ])
gpas = np.linspace(data['gpa'].min(), data['gpa'].max(), 10)
print gpas
# array([ 2.26 , 2.45333333, 2.64666667, 2.84 , 3.03333333,
import numpy as np
def cartesian(arrays, out=None):
"""
Generate a cartesian product of input arrays.
Parameters
----------
arrays : list of array-like
1-D arrays to form the cartesian product of.
def isolate_and_plot(variable):
# isolate gre and class rank
grouped = pd.pivot_table(combos, values=['admit_pred'], index=[variable, 'prestige'],
aggfunc=np.mean)
# in case you're curious as to what this looks like
# print grouped.head()
# admit_pred
# gre prestige
# 220.000000 1 0.282462
import pandas as pd
import statsmodels.api as sm
import pylab as pl
import numpy as np
def cartesian(arrays, out=None):
"""
Generate a cartesian product of input arrays.
@glamp
glamp / nltk_yhat_javascript.js
Last active December 15, 2015 03:09
showing how to use nltk w/ yhat
$ = require('jquery');
data = {
"data": "this is the kind of movie that makes one appreciate disney ' s live - action george of the jungle . tarzan and the lost city , the latest attempt to bring edgar rice burrough ' s legendary hero to the big screen , is one of the most inept and ill - timed of any tarzan adventure so far . badly conceived and poorly executed , tarzan and the lost city appears headed for a quick trip to video store shelves . i have no idea why the producers chose now to bring back tarzan ; it ' s not as if there are legions of new fans clamoring for his next movie . furthermore , it ' s even more curious that this film is rated pg , which , by definition , rules out any sex or explicit violence . and , while i ' m not advocating the excesses embraced by the 1981 bo derek version of the story , tarzan deserves a slightly more adult approach than the one used in the embarrassing production . of course , no tweaking of the content to change the rating could have saved tarzan and the los
data <- read.csv("http://www.ats.ucla.edu/stat/data/binary.csv")
head(data)
data$rank <- factor(data$rank)
fit <- glm(admit ~ gre + gpa + rank, data=data, family="binomial")
summary(fit)
# Call:
# glm(formula = admit ~ gre + gpa + rank, family = "binomial",
# data = data)
#
--MongoDB or other databases
select
'db.foo.findOne({_id: ObjectId("' || _id || '")})'
from foo;
--db.foo.find({_id: ObjectId("5066fa4abd8ad53408a4869c")})
--web based (great for spidering)
select
'http://www.yelp.com/search?find_loc=Manhattan%2C+NY&ns=1&find_desc=' || name as url
from
import psycopg2
import requests
conn = psycopg2.connect("{YOUR CONNECTION"})
cur = conn.cursor()
q = """
select
'http://www.yelp.com/search?find_loc=Manhattan%2C+NY&ns=1&find_desc=' || name as url
select
date_trunc('year', '2013-04-05'::date) as year
, date_trunc('month', '2013-04-05'::date) as month
, date_trunc('day', '2013-04-05'::date) as day;
-- year | month | day
--------------------------+------------------------+------------------------
-- 2013-01-01 00:00:00-05 | 2013-04-01 00:00:00-04 | 2013-04-05 00:00:00-04
select
to_char('2013-04-05'::date, 'YYYY') as year
, to_char('2013-04-05'::date, 'MM-YYYY') as month
--median (http://wiki.postgresql.org/wiki/Aggregate_Median)
CREATE OR REPLACE FUNCTION _final_median(anyarray)
RETURNS float8 AS
$$
WITH q AS
(
SELECT val
FROM unnest($1) val
WHERE VAL IS NOT NULL
ORDER BY 1