Created
May 2, 2012 04:04
-
-
Save flodel/2573531 to your computer and use it in GitHub Desktop.
Basic Craigslist API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
image.gallery <- function(url, ncol = 3L) { | |
## This function reformats the contents of a Craigslist search result URL | |
## into an image gallery, opened into the default browser | |
## | |
## Inputs: | |
## - url: a Craigslist search URL as created by search.url | |
## - ncol: the number of columns for the output image gallery | |
## | |
## Output: none. As a side effect, a browser is opened. | |
require(RCurl) | |
require(stringr) | |
require(R2HTML) | |
scrap <- getURL(url) | |
adds <- str_extract_all(scrap, '<p class="row">.*?</p>')[[1]] | |
pic.str <- str_extract(adds, 'id="images:.*?jpg"') | |
pic.base <- str_replace(pic.str, 'id="images:(.*jpg)"', "\\1") | |
pic.url <- paste("http://images.craigslist.org/", pic.base, sep = "") | |
pic.tags <- paste('<img src="', pic.url, '" alt="', pic.url, '" ', | |
'style="width:100%"/>', sep = "") | |
adds <- paste(pic.tags, '<BR>', adds) | |
add.mat <- suppressWarnings(matrix(adds, ncol = ncol, byrow = TRUE)) | |
html <- HTMLInitFile() | |
HTML(add.mat, innerBorder = 1, file = html) | |
HTMLEndFile(html) | |
browseURL(paste("file://", html, sep = "")) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.pick.category <- function() { | |
## This function prompts a menu for selecting a search category and returns | |
## the corresponding 3-letter code used by Craigslist | |
categories <- read.table(textConnection(" | |
CODE = DESCRIPTION | |
sss = all for sale / wanted | |
ata = antiques | |
ppa = appliances | |
ara = arts+crafts | |
pta = auto parts | |
baa = baby+kids | |
bar = barter | |
haa = beauty+health | |
bia = bikes | |
boo = boats | |
bka = books | |
bfa = business | |
cta = cars+trucks | |
ema = cd/dvd/vhs | |
moa = cell phones | |
cla = clothing+accessories | |
cba = collectibles | |
sya = computers | |
ela = electronics | |
gra = farm+garden | |
zip = free stuff | |
fua = furniture | |
gms = garage sales | |
foa = general for sale | |
hsa = household | |
wan = items wanted | |
jwa = jewelry | |
maa = materials | |
mca = motorcycles | |
msa = musical instruments | |
pha = photo+video | |
rva = recreational vehicles | |
sga = sporting goods | |
tia = tickets | |
tla = tools | |
taa = toys+games | |
vga = video gaming"), header = TRUE, sep = "=", strip.white = TRUE, | |
stringsAsFactors = FALSE) | |
selected.idx <- menu(categories$DESCRIPTION, title = "pick a category:") | |
return(categories$CODE[selected.idx]) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
search.craigslist <- function(site.url = "http://atlanta.craigslist.org", | |
ncol = 3L) { | |
## This function prompts the user with questions for searching Craigslist | |
## (search items, price, etc.) and displays the search results into an image | |
## gallery. | |
## | |
## Inputs: | |
## - site.url: Craigslist site URL | |
## - ncol: the number of columns for the output image gallery | |
## | |
## Output: none. As a side effect, a browser is opened. | |
query <- readline("search Craigslist for: ") | |
category <- .pick.category() | |
title.only <- readline("search titles only (press 1 or ENTER to skip): ") | |
min.price <- readline("min price (press ENTER to skip): ") | |
max.price <- readline("max price (press ENTER to skip): ") | |
pic.only <- readline("pic only (press 1 or ENTER to skip): ") | |
title.only <- title.only == "1" | |
min.price <- as.integer(min.price) | |
max.price <- as.integer(max.price) | |
pic.only <- pic.only == "1" | |
url <- search.url(query = query, | |
site.url = site.url, | |
category = category, | |
title.only = title.only, | |
min.price = min.price, | |
max.price = max.price, | |
pic.only = pic.only) | |
image.gallery(url, ncol = ncol) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
search.url <- function(query, | |
site.url = "http://atlanta.craigslist.org", | |
category = "sss", | |
title.only = TRUE, | |
min.price = integer(0), | |
max.price = integer(0), | |
pic.only = TRUE) { | |
## This function creates a search URL on Craigslist. | |
## | |
## Inputs: | |
## - query: search string | |
## - site.url: Craigslist site URL | |
## - category: a three-letter code for the category; some examples | |
## "sss": all for sale/wanted, "zip": free stuff | |
## - title.only: boolean for restricting the search to add titles | |
## - min.price minimum price | |
## - max.price: maximum price | |
## - pic.only: boolean for restricting the search to adds with pics | |
## | |
## Output: a string representing a Craigslist search URL | |
search <- list(query = gsub(" ", "+", query), | |
catAbb = category, | |
srchType = ifelse(title.only, "T", "A"), | |
minAsk = as.integer(min.price), | |
maxAsk = as.integer(max.price), | |
hasPic = as.numeric(pic.only)) | |
valid <- sapply(search, length) > 0L | |
search.str <- paste(names(search)[valid], search[valid], | |
sep = "=", collapse = "&") | |
search.url <- paste(site.url, "/search/sss?", search.str, sep = "") | |
return(search.url) | |
} |
@aerovistae a little late, but it's actually in R xD
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Am I crazy or this actually implemented in R?