swayson

    in2csv file1.xls > file1.csv

    in2csv -f fixed -s schema.csv data.fixed > data.csv

csvgrep -c phone_number -r "\d{3}-123-\d{4}" data.csv > matching.csv

	import pandas as pd
	from github import Github

	g = Github("username", "password")

	final = ({'url':r.html_url , 'name': r.name} for r in g.get_user().get_starred())
	pd.DataFrame(final).to_excel('Github Stars 20160101.xlsx')

	starts_with <- function(vars, match, ignore.case = TRUE) {
	if (ignore.case) match <- tolower(match)
	n <- nchar(match)

	if (ignore.case) vars <- tolower(vars)
	substr(vars, 1, n) == match
	}

	ends_with <- function(vars, match, ignore.case = TRUE) {
	if (ignore.case) match <- tolower(match)

	mylist <- list(structure(list(Hit = "True", Project = "Blue", Year = "2011",
	Rating = "4", Launch = "26 Jan 2012", ID = "19", Dept = "1, 2, 4"), .Names = c("Hit",
	"Project", "Year", "Rating", "Launch", "ID", "Dept")), structure(list(
	Hit = "False", Error = "Record not found"), .Names = c("Hit",
	"Error")), structure(list(Hit = "True", Project = "Green", Year = "2004",
	Rating = "8", Launch = "29 Feb 2004", ID = "183", Dept = "6, 8"), .Names = c("Hit",
	"Project", "Year", "Rating", "Launch", "ID", "Dept")))

	dfs <- lapply(mylist, data.frame, stringsAsFactors = FALSE)
	library(dplyr)

	minmax_scaler <- function(x, a, b) {
	"
	x: data. numeric vector of values to be scaled
	a: desired minimum after scaling takes place
	b: desired maximum after scaling takes place

	e.g. f(c(1,2,3,4), 1, 17)
	[1] 1.000000 6.333333 11.666667 17.000000
	"
	(((b - a)*(x - min(x))) / (max(x) - min(x))) + a

	def select(dataframe, columns, keep_others=True):
	''' Re-order or select columns. If keep_others, then it is simply re-ordered else it will select columns'''
	cols = set(dataframe.columns)
	if keep_others:
	others = list(cols.difference(columns))
	reordered = columns + others
	return dataframe[reordered]
	else:
	return dataframe[columns]




	def search_item(dataframe, name, query, na=False, case=False, regex=True):

	idx = pd.Series([False]*len(dataframe))

	# For each item in the query look for the item and collect the documents ids it pertains to
	for q in query:
	matches = dataframe[text_column].str.contains(q, na=False, case=False, regex=True)

	---
	title: 'Going deeper with dplyr: New features in 0.3 and 0.4'
	output: html_document
	---

	## Introduction

	In August 2014, I created a [40-minute video tutorial](https://www.youtube.com/watch?v=jWjqLW-u3hc) introducing the key functionality of the dplyr package in R, using dplyr version 0.2. Since then, there have been two significant updates to dplyr (0.3 and 0.4), introducing a ton of new features.

	This document (created in March 2015) covers the most useful new features in 0.3 and 0.4, as well as other functionality that I didn't cover last time (though it is not necessarily new). My [new video tutorial](https://www.youtube.com/watch?v=2mh1PqfsXVI) walks through the code below in detail.

	---
	title: "Introduction to dplyr for Faster Data Manipulation in R"
	output: html_document
	---

	Note: There is a 40-minute [video tutorial](https://www.youtube.com/watch?v=jWjqLW-u3hc) on YouTube that walks through this document in detail.

	## Why do I use dplyr?

	* Great for data exploration and transformation