flovv · September 25, 2020 05:10
diff --git a/Content_Evaluation.r b/Content_Evaluation.r
 require(httr)
 require(stringr)
 require(rvest)



 getNumberOfPages <- function(url){
  rb <- read_html(url)
  
  str <- rb %>%
    html_nodes(".pages") %>% html_text()%>%
    as.character()
  
  return(str_split_fixed(str, pattern="of ", n=2)[,2])
 }



 ###################

 getLinksForPage <- function(url){
  rb <- read_html(url)
  
  rb %>%
    html_nodes("p a") %>%
    html_attr("href") %>%
    as.character()
  
 }


 getAllLinks <- function(baseURL, numPages){
  re <- c()
  for(i in 0:numPages){
    url <- paste0(baseURL, i,"/")
    re <- c(re, getLinksForPage(url))
  }
 
  return(re)
 }




 ########### get 
 getEvaluation <- function(url2){
  
  ret <- GET(paste0("http://graph.facebook.com/?id=",url2))
  FB <- content(ret)
  
  ret2 <- GET(paste0("http://www.linkedin.com/countserv/count/share?url=",url2,"&format=json"))
  linkedIn <- content(ret2)
  
  ## read the orginal link to website .. so matching with GA data is painless.
  rb <- read_html(url2)
  
   orgLink <- rb %>%
    html_nodes(".social4i+ div") %>%
    as.character()
  
   pubDate <- rb %>%
     html_nodes(".date") %>%
     html_text() %>%as.character()
   
   orgLink <- str_split_fixed(str_split_fixed(orgLink, 'href=\"', 2)[,2], "\">", 2)[,1]
  
  data.frame(comments=FB$share$comment_count, shares=FB$share$share_count, linkedIn1=linkedIn$count, linkedIn2=linkedIn$fCntPlusOne, url=url2, orgLink = orgLink, pubDate=pubDate)
 }


 #############
 baseURL <- "https://www.r-bloggers.com/author/florian-teschner/page/"
 url <- "https://www.r-bloggers.com/author/florian-teschner/page/0/"

 numPages <- as.numeric(getNumberOfPages(url))
 links <- getAllLinks(baseURL, numPages)

 remove("dff")

 for(i in links){
  
 df <-  getEvaluation(i)
 if(exists("dff")){
   dff <- rbind(dff, df)
 }
 else{
   dff <- df
 }
 }

 library(googleAnalyticsR)

 googleAnalyticsR::ga_auth()
 account_list <- google_analytics_account_list()
 ga_id <- xxx # your GA_id here

 metrics <- googleAnalyticsR::google_analytics_meta()
 ###########

 ga_df <- google_analytics_4(ga_id, 
                          date_range = c("2015-12-01","2017-11-14"), 
                          metrics = c("ga:pageviews"), 
                          dimensions = c("ga:landingPagePath"),
                          anti_sample = TRUE)


 ########################
 require(lubridate)
 require(corrplot)

 dff$url <- str_replace_all(dff$url, "https://www.r-bloggers.com/", "")
 dff$url <- str_replace_all(dff$url, "/", "")

 dff$orgLink <- str_replace_all(dff$orgLink, "https://flovv.github.io/www.nypon.de/", "")
 dff$orgLink <- str_replace_all(dff$orgLink, "https://flovv.github.io/", "")
 dff$orgLink <- str_replace_all(dff$orgLink, "https://flovv.github.com/", "")

 dff$orgLink <- paste0("/", dff$orgLink)
 dff$pubDate <- str_replace_all(dff$pubDate, ",", "")


 dff$Date <- mdy(dff$pubDate)

 dff <- merge(dff, ga_df, by.x="orgLink", "landingPagePath")
 saveRDS(dff, "contentEvaluation.rds")
	require(httr)
	require(stringr)
	require(rvest)



	getNumberOfPages <- function(url){
	rb <- read_html(url)

	str <- rb %>%
	html_nodes(".pages") %>% html_text()%>%
	as.character()

	return(str_split_fixed(str, pattern="of ", n=2)[,2])
	}



	###################

	getLinksForPage <- function(url){
	rb <- read_html(url)

	rb %>%
	html_nodes("p a") %>%
	html_attr("href") %>%
	as.character()

	}


	getAllLinks <- function(baseURL, numPages){
	re <- c()
	for(i in 0:numPages){
	url <- paste0(baseURL, i,"/")
	re <- c(re, getLinksForPage(url))
	}

	return(re)
	}




	########### get
	getEvaluation <- function(url2){

	ret <- GET(paste0("http://graph.facebook.com/?id=",url2))
	FB <- content(ret)

	ret2 <- GET(paste0("http://www.linkedin.com/countserv/count/share?url=",url2,"&format=json"))
	linkedIn <- content(ret2)

	## read the orginal link to website .. so matching with GA data is painless.
	rb <- read_html(url2)

	orgLink <- rb %>%
	html_nodes(".social4i+ div") %>%
	as.character()

	pubDate <- rb %>%
	html_nodes(".date") %>%
	html_text() %>%as.character()

	orgLink <- str_split_fixed(str_split_fixed(orgLink, 'href=\"', 2)[,2], "\">", 2)[,1]

	data.frame(comments=FB$share$comment_count, shares=FB$share$share_count, linkedIn1=linkedIn$count, linkedIn2=linkedIn$fCntPlusOne, url=url2, orgLink = orgLink, pubDate=pubDate)
	}


	#############
	baseURL <- "https://www.r-bloggers.com/author/florian-teschner/page/"
	url <- "https://www.r-bloggers.com/author/florian-teschner/page/0/"

	numPages <- as.numeric(getNumberOfPages(url))
	links <- getAllLinks(baseURL, numPages)

	remove("dff")

	for(i in links){

	df <- getEvaluation(i)
	if(exists("dff")){
	dff <- rbind(dff, df)
	}
	else{
	dff <- df
	}
	}

	library(googleAnalyticsR)

	googleAnalyticsR::ga_auth()
	account_list <- google_analytics_account_list()
	ga_id <- xxx # your GA_id here

	metrics <- googleAnalyticsR::google_analytics_meta()
	###########

	ga_df <- google_analytics_4(ga_id,
	date_range = c("2015-12-01","2017-11-14"),
	metrics = c("ga:pageviews"),
	dimensions = c("ga:landingPagePath"),
	anti_sample = TRUE)


	########################
	require(lubridate)
	require(corrplot)

	dff$url <- str_replace_all(dff$url, "https://www.r-bloggers.com/", "")
	dff$url <- str_replace_all(dff$url, "/", "")

	dff$orgLink <- str_replace_all(dff$orgLink, "https://flovv.github.io/www.nypon.de/", "")
	dff$orgLink <- str_replace_all(dff$orgLink, "https://flovv.github.io/", "")
	dff$orgLink <- str_replace_all(dff$orgLink, "https://flovv.github.com/", "")

	dff$orgLink <- paste0("/", dff$orgLink)
	dff$pubDate <- str_replace_all(dff$pubDate, ",", "")


	dff$Date <- mdy(dff$pubDate)

	dff <- merge(dff, ga_df, by.x="orgLink", "landingPagePath")
	saveRDS(dff, "contentEvaluation.rds")