erdavis1 · May 21, 2020 19:32
diff --git a/Processallbooks.R b/Processallbooks.R
 library(reticulate)
 library(cleanNLP)
 library(dplyr)
 library(stringr)
 library(tidyr)
 library(textstem)

 setwd("C:/Users/Erin/Documents/DataViz/Adjectives/")
 options(stringsAsFactors = FALSE)

 #-----spaCy
 use_python("C:/Users/Erin/Anaconda3")
 cnlp_init_spacy()

 #---get basic data
 body <- read.csv("./Data/bodyparts.csv")
 files <- read.csv('PotentialBooks.csv') 
 lim <- 900000
 final <- NULL
  

 for (i in 1:nrow(files)) {
  mastertext <-  readLines(files$Link[i]) %>% paste(collapse = " ") #readLines('test.txt') %>% paste(collapse = " ") #t 

  #---spacy can only process 900k characters at once, or thereabouts
  loops <- ceiling(nchar(mastertext)/lim)
  
  for (j in 1:loops) {
    #body <- read.csv("./Data/bodyparts.csv")
    text <- substr(mastertext, (j-1)*lim+1, j*lim) #readLines('felurian.txt') %>% paste(collapse = " ") #
    
    #------annotate. this will take a bit
    obj <- cnlp_annotate(text)
    
    #----------extract body parts
    <BODY PART EXTRACTION CODE HERE>
 	
 	#----------bind in results to final dataframe
 	book_results <-  bind_rows(simpleposs, hadadj) %>% bind_rows(poss) %>% unique()
 	book_results$id <- files$ID[i]
 	
    final <- bind_rows(final, book_results)
  }
 }

 <SKEW CALCULATIONS PROCEED AS NORMAL HERE>
	library(reticulate)
	library(cleanNLP)
	library(dplyr)
	library(stringr)
	library(tidyr)
	library(textstem)

	setwd("C:/Users/Erin/Documents/DataViz/Adjectives/")
	options(stringsAsFactors = FALSE)

	#-----spaCy
	use_python("C:/Users/Erin/Anaconda3")
	cnlp_init_spacy()

	#---get basic data
	body <- read.csv("./Data/bodyparts.csv")
	files <- read.csv('PotentialBooks.csv')
	lim <- 900000
	final <- NULL


	for (i in 1:nrow(files)) {
	mastertext <- readLines(files$Link[i]) %>% paste(collapse = " ") #readLines('test.txt') %>% paste(collapse = " ") #t

	#---spacy can only process 900k characters at once, or thereabouts
	loops <- ceiling(nchar(mastertext)/lim)

	for (j in 1:loops) {
	#body <- read.csv("./Data/bodyparts.csv")
	text <- substr(mastertext, (j-1)lim+1, jlim) #readLines('felurian.txt') %>% paste(collapse = " ") #

	#------annotate. this will take a bit
	obj <- cnlp_annotate(text)

	#----------extract body parts
	<BODY PART EXTRACTION CODE HERE>

	#----------bind in results to final dataframe
	book_results <- bind_rows(simpleposs, hadadj) %>% bind_rows(poss) %>% unique()
	book_results$id <- files$ID[i]

	final <- bind_rows(final, book_results)
	}
	}

	<SKEW CALCULATIONS PROCEED AS NORMAL HERE>
No results found