Aleksandr Kosobokov aleksandr-kosobokov

Useful Pandas Snippets

A personal diary of DataFrame munging over the years.

Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)

We will use official box "ubuntu/xenial64" and modify it to work with Vagrant.

	#!/usr/bin/env python

	"""
	Serialize/unserialize a class with a pandas data structure attribute using msgpack.
	"""

	import msgpack

	import numpy as np
	import pandas as pd

	# When you're sure of the format, it's much quicker to explicitly convert your dates than use `parse_dates`
	# Makes sense; was just surprised by the time difference.
	import pandas as pd
	from datetime import datetime
	to_datetime = lambda d: datetime.strptime(d, '%m/%d/%Y %H:%M')

	%time trips = pd.read_csv('data/divvy/Divvy_Trips_2013.csv', parse_dates=['starttime', 'stoptime'])
	# CPU times: user 1min 29s, sys: 331 ms, total: 1min 29s
	# Wall time: 1min 30s

	def get_proxy_url
	# Doesn't support different proxies for different protocols at present
	host_proxy = ENV['http_proxy'] \|\| ENV['HTTP_PROXY'] \|\| ENV['https_proxy'] \|\| ENV["HTTPS_PROXY"]
	if host_proxy
	uri = URI(host_proxy)
	if ['localhost', '127.0.0.1'].include? uri.host
	# 10.0.2.2 is the default vagrant gateway and should connect to the host OS.
	# Confirm this by running 'netstat -r' in the guest.
	host_proxy = host_proxy.sub(uri.host, '10.0.2.2')
	end

	file = open("minify1.txt", "w")
	with open('test.txt') as f:
	for line in f:
	newTxt = line.rstrip('\r\n').replace(" ","")
	file.write(newTxt)
	if 'str' in line:
	break

	file.close()

	# Copyright 2015 Paul Brewer Economic and Financial Technology Consulting LLC
	# Released under the MIT Public License
	# LICENSE: http://opensource.org/licenses/MIT
	# Purpose: rationally removes inner commas and inner quotes from csv file fields
	# Useful for Google BigQuery as of 2015-03 does not support quoted commas in CSV fields
	# python ./unf

	import pysal as ps
	import pandas as pd
	'''
	Arguments
	---------
	dbfile : DBF file - Input to be imported
	upper : Condition - If true, make column heads upper case
	'''
	def dbf2DF(dbfile, upper=True): #Reads in DBF files and returns Pandas DF
	db = ps.open(dbfile) #Pysal to open DBF

	#!/bin/sh

	# Install node and npm via nvm - https://github.com/nvm-sh/nvm

	# Run this script like - bash script-name.sh

	# Define versions
	INSTALL_NODE_VER=22
	INSTALL_NVM_VER=0.40.1

 # Download latest archlinux bootstrap package, see https://www.archlinux.org/download/
 wget 'ftp://ftp.nluug.nl/pub/os/Linux/distr/archlinux/iso/latest/archlinux-bootstrap-*-x86_64.tar.gz'
 # Make sure you'll have enough entropy for pacman-key later.
 apt-get install haveged
 # Install the arch bootstrap image in a tmpfs.
 mount -t tmpfs none /mnt
 cd /mnt
 tar xvf ~/archlinux-bootstrap-*-x86_64.tar.gz --strip-components=1