#Non-mathematical Introductions
- http://gcn.com/articles/2014/01/09/topographical-data-analysis.aspx
- https://www.simonsfoundation.org/quanta/20131004-the-mathematical-shape-of-things-to-come/
#Videos
| def consumer(func): | |
| ''' | |
| Decorator taking care of initial next() call to "sending" generators | |
| From PEP-342 | |
| http://www.python.org/dev/peps/pep-0342/ | |
| ''' | |
| def wrapper(*args,**kw): | |
| gen = func(*args, **kw) | |
| next(gen) |
#Non-mathematical Introductions
#Videos
| import numpy as np | |
| import marisa_trie | |
| from sklearn.feature_extraction.text import CountVectorizer | |
| from sklearn.externals import six | |
| class MarisaCountVectorizer(CountVectorizer): | |
| # ``CountVectorizer.fit`` method calls ``fit_transform`` so | |
| # ``fit`` is not provided | |
| def fit_transform(self, raw_documents, y=None): |
A personal diary of DataFrame munging over the years.
Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)
A checklist for designing and developing internet scale services, inspired by James Hamilton's 2007 paper "On Desgining and Deploying Internet-Scale Services."
| "full_name";"party";"official_post";"constituency";"twitter_handle";"twitter_user_id";"uri";"last_updated";"notes" | |
| "Ms Diane Abbott MP";"Labour";;"Hackney North and Stoke Newington";"https://twitter.com/HackneyAbbott";153810216;"http://dbpedia.org/resource/Diane_Abbott";"2014-10-18T10:04:00+01:00"; | |
| "Debbie Abrahams MP";"Labour";;"Oldham East and Saddleworth";"https://twitter.com/Debbie_abrahams";225857392;"http://dbpedia.org/resource/Debbie_Abrahams";"2014-10-18T10:04:00+01:00"; | |
| "Nigel Adams MP";"Conservative";;"Selby and Ainsty";"TWITTER_UNKNOWN";-1;"http://dbpedia.org/resource/Nigel_Adams";"2014-10-18T10:04:00+01:00"; | |
| "Adam Afriyie MP";"Conservative";;"Windsor";"https://twitter.com/AdamAfriyie";22031058;"http://dbpedia.org/resource/Adam_Afriyie";"2014-10-18T10:04:00+01:00"; | |
| "Rt Hon Bob Ainsworth MP";"Labour";;"Coventry North East";"TWITTER_UNKNOWN";-1;"http://dbpedia.org/resource/Bob_Ainsworth";"2014-10-18T10:04:00+01:00"; | |
| "Peter Aldous MP";"Conservative";;"Waveney";"https://twitter.com/peter_aldous";255998 |
| #Plot data using ggplot2 | |
| library(ggplot2) | |
| #Calculate points crossing UCL or LCL | |
| pageviews_w_forecast$outliers <- | |
| ifelse(pageviews_w_forecast$pageviews > pageviews_w_forecast$upperBound.pageviews, pageviews_w_forecast$pageviews, | |
| ifelse(pageviews_w_forecast$pageviews < pageviews_w_forecast$lowerBound.pageviews, pageviews_w_forecast$pageviews, NA)) | |
| #Add LCL and UCL labels | |
| LCL <- vector(mode = "character", nrow(pageviews_w_forecast)) |
Headers: Accept: application/json, text/plain, / Referer: https://yougov.co.uk/profiler
| import numpy as np | |
| import pandas as pd | |
| from lxml import html | |
| from sklearn import metrics | |
| from sklearn.cross_validation import train_test_split | |
| from sklearn.linear_model import LogisticRegression as LR | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| def clean(text): | |
| return html.fromstring(text).text_content().lower().strip() |
| # generate 100-dimensional random walk data so that each data point in a sequence is similar to the last data point | |
| import numpy as np | |
| last=np.random.normal(0, .1, 100) | |
| for i in range(1000): | |
| new=last+np.random.normal(0, .1, 100) | |
| last=new | |
| print ' '.join(str(x) for x in new) |