Skip to content

Instantly share code, notes, and snippets.

@justgrimes
justgrimes / gist:3308085
Created August 9, 2012 21:07
text mining in r snippet
require(tm)
a <- Corpus(DirSource("C:/Users/jgrimes/Desktop/text/"), readerControl = list(language="lat"))
#summary(a)
a <- tm_map(a, function(x) iconv(enc2utf8(x), sub = "byte"))
a <- tm_map(a, removePunctuation)
a <- tm_map(a, removeNumbers)
a <- tm_map(a, stripWhitespace)
a <- tm_map(a, tolower)
a <- tm_map(a, removeWords, stopwords("english"))
a <- tm_map(a, stemDocument, language = "english")
@justgrimes
justgrimes / latlong_distance_sql_snippet
Created April 17, 2012 17:01
sql snippet for calculating distance for latitude, longtitude
SELECT latitude,longitude, (3963.191 * ACOS(
(SIN(PI()* 40.7383040 /180)*SIN(PI()*latitude/180)) +
(COS(PI()* 40.7383040 /180)*cos(PI()*latitude/180)*COS(PI() * longitude/180-PI()* -73.99319 /180))
)) AS distance
FROM TABLE
WHERE 1=1
AND 3963.191 * ACOS( (SIN(PI()* 40.7383040 /180)*SIN(PI() * latitude/180)) +
(COS(PI()* 40.7383040 /180)*cos(PI()*latitude/180)*COS(PI() * longitude/180-PI()* -73.99319 /180))
) <= 1.5
ORDER BY 3963.191 * ACOS(
@justgrimes
justgrimes / latlongmatch.py
Created April 16, 2012 17:52
Match two sets of latitude/longitude points by distance
#script that attempts to match compare lat,long points two csv files, match by distance
#justin grimes (@justgrimes) 04/16/12
import math
import csv
import sys
import re
# takes two lat/long points and returns distance, modified code from -> http://www.johndcook.com/python_longitude_latitude.html
def distance_on_unit_sphere(lat1, long1, lat2, long2):
@justgrimes
justgrimes / gist:2348459
Created April 10, 2012 05:16
publiclibraries.com scraper
# publiclibraries.com scraper
import BeautifulSoup
import urllib2
l = [("http://www.publiclibraries.com/alabama.htm","AL"),
("http://www.publiclibraries.com/alaska.htm","AK"),
("http://www.publiclibraries.com/arizona.htm","AZ"),
("http://www.publiclibraries.com/arkansas.htm","AR"),
("http://www.publiclibraries.com/california.htm","CA"),