luisDVA · November 8, 2017 16:26
diff --git a/ggpupBV.R b/ggpupBV.R
 # define function
 # this code is just for fun and I do not have rights to the images on dogtime.com I just like dogs

 ggpupBV <- function(ggplotObject,breedVector){
  
  # required packages 
  require(dplyr)
  require(jpeg)
  require(grid)
  require(gridExtra)
  require(RCurl)
  require(rvest)
  require(stringi)
  require(extrafont)
  require(fuzzyjoin)
  
  # scrape a list of image URLS from the dogtime breed profiles homepage  
  
  if (!exists("imgurls")){
    # read page source
    dogIndex <- read_html("http://dogtime.com/dog-breeds/profiles")
    # keep node of interest (identified using Selectorgadget)
    scrapedHtml <- dogIndex %>% html_nodes(".horizontal-group-listing") %>% toString()
    
    # match image urls
    ## Find everything that starts with "http"
    ### No white spaces allowed
    ### Ends with jpg 
    #### Note: this regex is for PC
    imgurlslist <- stri_match_all_regex(scrapedHtml,"(http[^\\s]+(jpg)\\b)")
    # subset into character vector
    imgurls <- imgurlslist[[1]][,1]
  }
  
 # convert list to df
  imgurlsDF <- data.frame(imgurls)
 # clean up urls (gradually)
  imgurlsDF$breedname <- stri_extract_all_regex(imgurlsDF$imgurls,"(?<=\\_).*?(?=\\.)")
  imgurlsDF$breedname <- stri_extract_all_regex(imgurlsDF$breedname,"(?<=\\_).*?(?=300)")
  imgurlsDF$breedname <- stri_replace_all_fixed(imgurlsDF$breedname,pattern = "-"," ")
  imgurlsDF$breedname <- trimws(imgurlsDF$breedname)
  imgurlsDF$breedname <- stri_replace_all_fixed(imgurlsDF$breedname,"dog breed","")
  imgurlsDF$breedname <- stri_replace_all_fixed(imgurlsDF$breedname,"what is a","")
  imgurlsDF$breedname <- stri_replace_all_fixed(imgurlsDF$breedname,"what is the","")
  
  #match provided breed vector with URLS
  imgurlsDF <- imgurlsDF %>% filter(!is.na(breedname))
  breedVector <- data.frame(breeds=breedVector)
  joinedMatches <- stringdist_left_join(breedVector,imgurlsDF, by=c("breeds"="breedname"),max_dist=3)
  joinedMatches$imgurls <- as.character(joinedMatches$imgurls)
  joinedMatches <- joinedMatches %>% filter(!is.na(imgurls))
  
  # scrape two dog breed photos 
  urlInd <- sample(nrow(joinedMatches),2,rep=F)
  # for upper right corner
  dogImg.URLU <- joinedMatches$imgurls[urlInd[1]]
  # save as a raster object
  dogImgU <- rasterGrob(readJPEG(getURLContent(dogImg.URLU)))
  # label for grob
  dogImgUlab <- textGrob(paste(stri_trans_totitle(joinedMatches$breedname[urlInd[1]])))
  # for lower right corner
  dogImg.URLL <- joinedMatches$imgurls[urlInd[2]]
  # save as a raster object
  dogImgL <- rasterGrob(readJPEG(getURLContent(dogImg.URLL)))
   # label for grob
  dogImgLlab <- textGrob(paste(stri_trans_totitle(joinedMatches$breedname[urlInd[2]])))
    
  # graphical parameters
  # define plot layout 
  lay <- rbind(c(1,1,2),
               c(1,1,3),
               c(1,1,4),
               c(1,1,5))
  
  # set up some attribution text
  # the fontfamily parameter is optional, erase or change to something else to match the fonts available for your system
  rightText=textGrob("images from www.dogtime.com", rot=90, gp=gpar(fontfamily = "Roboto Condensed Light"))
  
  # arrange the plot and the image side by side
  grid.arrange(ggplotObject, dogImgU, dogImgUlab, dogImgL, dogImgLlab, layout_matrix=lay,widths=c(2,1,1),right=rightText)
  
 }
	# define function
	# this code is just for fun and I do not have rights to the images on dogtime.com I just like dogs

	ggpupBV <- function(ggplotObject,breedVector){

	# required packages
	require(dplyr)
	require(jpeg)
	require(grid)
	require(gridExtra)
	require(RCurl)
	require(rvest)
	require(stringi)
	require(extrafont)
	require(fuzzyjoin)

	# scrape a list of image URLS from the dogtime breed profiles homepage

	if (!exists("imgurls")){
	# read page source
	dogIndex <- read_html("http://dogtime.com/dog-breeds/profiles")
	# keep node of interest (identified using Selectorgadget)
	scrapedHtml <- dogIndex %>% html_nodes(".horizontal-group-listing") %>% toString()

	# match image urls
	## Find everything that starts with "http"
	### No white spaces allowed
	### Ends with jpg
	#### Note: this regex is for PC
	imgurlslist <- stri_match_all_regex(scrapedHtml,"(http[^\\s]+(jpg)\\b)")
	# subset into character vector
	imgurls <- imgurlslist[[1]][,1]
	}

	# convert list to df
	imgurlsDF <- data.frame(imgurls)
	# clean up urls (gradually)
	imgurlsDF$breedname <- stri_extract_all_regex(imgurlsDF$imgurls,"(?<=\\_).*?(?=\\.)")
	imgurlsDF$breedname <- stri_extract_all_regex(imgurlsDF$breedname,"(?<=\\_).*?(?=300)")
	imgurlsDF$breedname <- stri_replace_all_fixed(imgurlsDF$breedname,pattern = "-"," ")
	imgurlsDF$breedname <- trimws(imgurlsDF$breedname)
	imgurlsDF$breedname <- stri_replace_all_fixed(imgurlsDF$breedname,"dog breed","")
	imgurlsDF$breedname <- stri_replace_all_fixed(imgurlsDF$breedname,"what is a","")
	imgurlsDF$breedname <- stri_replace_all_fixed(imgurlsDF$breedname,"what is the","")

	#match provided breed vector with URLS
	imgurlsDF <- imgurlsDF %>% filter(!is.na(breedname))
	breedVector <- data.frame(breeds=breedVector)
	joinedMatches <- stringdist_left_join(breedVector,imgurlsDF, by=c("breeds"="breedname"),max_dist=3)
	joinedMatches$imgurls <- as.character(joinedMatches$imgurls)
	joinedMatches <- joinedMatches %>% filter(!is.na(imgurls))

	# scrape two dog breed photos
	urlInd <- sample(nrow(joinedMatches),2,rep=F)
	# for upper right corner
	dogImg.URLU <- joinedMatches$imgurls[urlInd[1]]
	# save as a raster object
	dogImgU <- rasterGrob(readJPEG(getURLContent(dogImg.URLU)))
	# label for grob
	dogImgUlab <- textGrob(paste(stri_trans_totitle(joinedMatches$breedname[urlInd[1]])))
	# for lower right corner
	dogImg.URLL <- joinedMatches$imgurls[urlInd[2]]
	# save as a raster object
	dogImgL <- rasterGrob(readJPEG(getURLContent(dogImg.URLL)))
	# label for grob
	dogImgLlab <- textGrob(paste(stri_trans_totitle(joinedMatches$breedname[urlInd[2]])))

	# graphical parameters
	# define plot layout
	lay <- rbind(c(1,1,2),
	c(1,1,3),
	c(1,1,4),
	c(1,1,5))

	# set up some attribution text
	# the fontfamily parameter is optional, erase or change to something else to match the fonts available for your system
	rightText=textGrob("images from www.dogtime.com", rot=90, gp=gpar(fontfamily = "Roboto Condensed Light"))

	# arrange the plot and the image side by side
	grid.arrange(ggplotObject, dogImgU, dogImgUlab, dogImgL, dogImgLlab, layout_matrix=lay,widths=c(2,1,1),right=rightText)

	}