Skip to content

Instantly share code, notes, and snippets.

@benmarwick
Last active April 29, 2023 21:40
Show Gist options
  • Select an option

  • Save benmarwick/8800dbf288b2a30c7612bbc5c164ee10 to your computer and use it in GitHub Desktop.

Select an option

Save benmarwick/8800dbf288b2a30c7612bbc5c164ee10 to your computer and use it in GitHub Desktop.
Get violin auction prices from a few websites and plot
library(rvest)
url <- "https://www.maestronet.com/history/makers_list.cfm?ID=1"
links_on_page <-
url %>%
read_html() %>%
html_nodes(".text a") %>%
html_attr('href')
pages_from_links <-
str_glue('https://www.maestronet.com/history/{links_on_page}')
safely_read_html <-
safely(read_html)
get_pages <-
map(pages_from_links, ~.x %>% safely_read_html())
get_pages_results <-
map(get_pages, ~.x$result) %>%
compact
get_pages_results_tables <-
map(get_pages_results, ~.x %>%
html_nodes("td td td td") %>%
html_text)
get_pages_results_prices <-
map(get_pages_results_tables, ~ str_subset(.x, "\\$.")) %>%
unlist %>%
parse_number() %>%
enframe
# plot distribution of auction prices from www.maestronet.com
ggplot(get_pages_results_prices,
aes(value)) +
geom_histogram() +
geom_vline(xintercept = median(get_pages_results_prices$value),
colour = "red",
size = 2) +
annotate("text",
x = median(get_pages_results_prices$value) + 17000,
y = 250,
label = str_glue("Median price is ${median(get_pages_results_prices$value)}"),
size = 6) +
scale_x_log10(labels = scales::dollar) +
theme_minimal(base_size = 16) +
xlab(str_glue('Auction prices of {nrow(get_pages_results_prices)} violins\n data from https://www.maestronet.com/'))
# how about at https://tarisio.com/auctions/auction-results/
url_r <- "https://tarisio.com/auctions/auction-results/"
tarisio_com_auctions <-
url_r %>%
read_html()
tarisio_com_auctions_prices <-
tarisio_com_auctions %>%
html_table(fill = TRUE) %>%
.[[1]] %>%
janitor::clean_names() %>%
select(starts_with('price')) %>%
stack %>%
mutate(value = parse_number(values)) %>%
filter(!is.na(value))
# plot distribution of auction prices from www.maestronet.com
ggplot(tarisio_com_auctions_prices,
aes(value)) +
geom_histogram() +
geom_vline(xintercept = median(tarisio_com_auctions_prices$value),
colour = "red",
size = 2) +
annotate("text",
x = median(tarisio_com_auctions_prices$value) + 10000,
y = 25,
label = str_glue("Median price is ${median(tarisio_com_auctions_prices$value)}"),
size = 6) +
scale_x_log10(labels = scales::dollar) +
theme_minimal(base_size = 16) +
xlab(str_glue('Auction prices of {nrow(tarisio_com_auctions_prices)} violins\n data from https://tarisio.com/auctions'))
# and what about Christies
the_page <- "https://www.christies.com/lotfinder/searchresults.aspx?pid=temporarysoldlotsbanner&searchtype=p&action=paging&entry=violin&pg=all&sid=3666efe5-47bd-4a4a-a5b9-6eb10ea5e008"
the_node <- ".accept-cookies-button , h3 , .price"
christies_auctions <-
the_page %>%
read_html() %>%
html_nodes(the_node)
christies_auctions_usd <-
christies_auctions %>%
html_text() %>%
str_subset("Price") %>%
str_subset("USD") %>%
parse_number() %>%
enframe()
# plot distribution of auction prices from Christies
ggplot(christies_auctions_usd,
aes(value)) +
geom_histogram() +
geom_vline(xintercept = median(christies_auctions_usd$value),
colour = "red",
size = 2) +
annotate("text",
x = median(christies_auctions_usd$value) + 15000,
y = 60,
label = str_glue("Median price is ${median(christies_auctions_usd$value)}"),
size = 6) +
scale_x_log10(labels = scales::dollar) +
theme_minimal(base_size = 16) +
xlab(str_glue('Auction prices of {nrow(christies_auctions_usd)} violins\n data from https://www.christies.com'))
@benmarwick
Copy link
Author

benmarwick commented Nov 10, 2018

image

image

Includes some bows and other things:
image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment