A personal diary of DataFrame munging over the years.
Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)
#!/usr/bin/env python | |
""" | |
Serialize/unserialize a class with a pandas data structure attribute using msgpack. | |
""" | |
import msgpack | |
import numpy as np | |
import pandas as pd |
A personal diary of DataFrame munging over the years.
Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)
# When you're sure of the format, it's much quicker to explicitly convert your dates than use `parse_dates` | |
# Makes sense; was just surprised by the time difference. | |
import pandas as pd | |
from datetime import datetime | |
to_datetime = lambda d: datetime.strptime(d, '%m/%d/%Y %H:%M') | |
%time trips = pd.read_csv('data/divvy/Divvy_Trips_2013.csv', parse_dates=['starttime', 'stoptime']) | |
# CPU times: user 1min 29s, sys: 331 ms, total: 1min 29s | |
# Wall time: 1min 30s |
def get_proxy_url | |
# Doesn't support different proxies for different protocols at present | |
host_proxy = ENV['http_proxy'] || ENV['HTTP_PROXY'] || ENV['https_proxy'] || ENV["HTTPS_PROXY"] | |
if host_proxy | |
uri = URI(host_proxy) | |
if ['localhost', '127.0.0.1'].include? uri.host | |
# 10.0.2.2 is the default vagrant gateway and should connect to the host OS. | |
# Confirm this by running 'netstat -r' in the guest. | |
host_proxy = host_proxy.sub(uri.host, '10.0.2.2') | |
end |
file = open("minify1.txt", "w") | |
with open('test.txt') as f: | |
for line in f: | |
newTxt = line.rstrip('\r\n').replace(" ","") | |
file.write(newTxt) | |
if 'str' in line: | |
break | |
file.close() |
# Copyright 2015 Paul Brewer Economic and Financial Technology Consulting LLC | |
# Released under the MIT Public License | |
# LICENSE: http://opensource.org/licenses/MIT | |
# Purpose: rationally removes inner commas and inner quotes from csv file fields | |
# Useful for Google BigQuery as of 2015-03 does not support quoted commas in CSV fields | |
# python ./unf |
import pysal as ps | |
import pandas as pd | |
''' | |
Arguments | |
--------- | |
dbfile : DBF file - Input to be imported | |
upper : Condition - If true, make column heads upper case | |
''' | |
def dbf2DF(dbfile, upper=True): #Reads in DBF files and returns Pandas DF | |
db = ps.open(dbfile) #Pysal to open DBF |
#!/bin/sh | |
# Install node and npm via nvm - https://github.com/nvm-sh/nvm | |
# Run this script like - bash script-name.sh | |
# Define versions | |
INSTALL_NODE_VER=22 | |
INSTALL_NVM_VER=0.40.1 |
# Download latest archlinux bootstrap package, see https://www.archlinux.org/download/ | |
wget 'ftp://ftp.nluug.nl/pub/os/Linux/distr/archlinux/iso/latest/archlinux-bootstrap-*-x86_64.tar.gz' | |
# Make sure you'll have enough entropy for pacman-key later. | |
apt-get install haveged | |
# Install the arch bootstrap image in a tmpfs. | |
mount -t tmpfs none /mnt | |
cd /mnt | |
tar xvf ~/archlinux-bootstrap-*-x86_64.tar.gz --strip-components=1 |