expersso · August 29, 2015 14:22 · ClintWeathers · Jun 12, 2015
diff --git a/ldply_example b/ldply_example
 # Using ldply returns a clean data frame, so avoids 
 # the lapply + do.call(rbind, ...) idiom
 # Also, the .progress argument is very useful

 df <- ldply(c(2002:2014, 52, 26, 13, 4, 1), function(x) {
        sprintf("http://distrowatch.com/index.php?dataspan=%d", x) %>%
        html() %>% 
        html_nodes(xpath = 
        "//table[@class = 'News' and @style = 'direction: ltr'][2]") %>%
        .[[1]] %>%
        html_table() %>% 
        .[-(1:2), -1] %>% 
        setNames(c("distro", "npd")) %>% 
        mutate(npd = npd %>% str_replace(",", "") %>% as.numeric(),
               time = x) %>% 
        tbl_df()
 }, .progress = "text")

 head(df)
 #     distro npd time
 # 1 Mandrake 473 2002
 # 2  Red Hat 453 2002
 # 3   Gentoo 326 2002
 # 4   Debian 311 2002
 # 5 Sorcerer 253 2002
 # 6     SuSE 250 2002
	# Using ldply returns a clean data frame, so avoids
	# the lapply + do.call(rbind, ...) idiom
	# Also, the .progress argument is very useful

	df <- ldply(c(2002:2014, 52, 26, 13, 4, 1), function(x) {
	sprintf("http://distrowatch.com/index.php?dataspan=%d", x) %>%
	html() %>%
	html_nodes(xpath =
	"//table[@class = 'News' and @style = 'direction: ltr'][2]") %>%
	.[[1]] %>%
	html_table() %>%
	.[-(1:2), -1] %>%
	setNames(c("distro", "npd")) %>%
	mutate(npd = npd %>% str_replace(",", "") %>% as.numeric(),
	time = x) %>%
	tbl_df()
	}, .progress = "text")

	head(df)
	# distro npd time
	# 1 Mandrake 473 2002
	# 2 Red Hat 453 2002
	# 3 Gentoo 326 2002
	# 4 Debian 311 2002
	# 5 Sorcerer 253 2002
	# 6 SuSE 250 2002