Created
March 1, 2017 20:09
-
-
Save jfaganUK/b6e1d647194126ff26dffbdace273723 to your computer and use it in GitHub Desktop.
Functions for connecting to and running analyses on uClassify
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Functions for connecting and running uclassify text | |
#' Create an output dataset | |
#' | |
#' @param text_vec The vector of text that will be used | |
#' @return A data.table | |
uclassify_output_dt <- function(text_vec) { | |
N <- length(text_vec) | |
data.table(txt = text_vec, text_coverage = numeric(N)) | |
} | |
#' Add the classifier properties to the output data table | |
#' | |
#' @param o The output dataset | |
#' @param url The URL of the uClassifier | |
#' @param token the API token | |
#' @return A data table with some new columns informed from the API | |
uclassify_get_classifier_struct <- function(o, url = getOption('uclassifyURL'), token = getOption('uclassifyToken')) { | |
require(httr) | |
req <- GET(url, encode = 'json', | |
add_headers(Authorization = paste0("Token ", token), 'Content-Type' = 'application/json')) | |
classifier_structure <- fromJSON(rawToChar(req$content)) | |
for(i in 1:nrow(classifier_structure)) { | |
o[[classifier_structure$className[i]]] <- numeric(N) | |
o[[classifier_structure$className[i]]] <- NA | |
} | |
return(o) | |
} | |
#' Run the vector of text against the uClassify classifier defined using the URL. | |
#' | |
#' @param text_vec The vector of text | |
#' @param url The URL of the classifier | |
#' @param token The API token | |
#' @return A data table of results | |
uclassify_get_classify_results <- function(text_vec, url = getOption('uclassifyURL'), token = getOption('uclassifyToken')) { | |
o <- uclassify_output_dt(text_vec) | |
json_body <- toJSON(list(texts = text_vec)) | |
# request results from the classifier | |
req <- POST(paste0(url, '/classify'), | |
body = json_body, encode = 'json', | |
add_headers(Authorization = paste0("Token ", token), 'Content-Type' = 'application/json')) | |
result_list <- fromJSON(rawToChar(req$content), simplifyVector = F, simplifyDataFrame = T, flatten = T) | |
# reshape the results | |
o$text_coverage <- result_list$textCoverage | |
o <- cbind(o, uclassify_reshape_result(result_list)) | |
return(o) | |
} | |
#' Takes the request, result list and reshapes the classification results into a data table | |
#' | |
#' @param result_list A result list converted from JSON | |
#' @return A data.table of the combined, reshaped results | |
uclassify_reshape_result <- function(result_list) { | |
xo <- rbindlist(lapply(result_list$classification, function(x) { | |
x$id <- 0 | |
xo <- reshape(x, direction = 'wide', timevar = 'className', idvar = 'id') | |
xo$id <- NULL | |
colnames(xo) <- gsub('p\\.', '', colnames(xo)) | |
xo | |
})) | |
return(xo) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rm(list=ls()) | |
gc() | |
library(jsonlite) | |
library(httr) | |
library(data.table) | |
source('uclassify_functions.R') | |
# this is just an example url for a recent project | |
options(uclassifyURL = 'https://api.uclassify.com/v1/prfekt/myers-briggs-attitude', | |
uclassifyToken = 'yourAPIKey') | |
# just some some text | |
text_vec <- c('Great meeting with CEOs of leading U.S. health insurance companies who provide great healthcare to the American people.', | |
'Russia talk is FAKE NEWS put out by the Dems, and played up by the media, in order to mask the big election defeat and the illegal leaks!', | |
'The race for DNC Chairman was, of course, totally "rigged." Bernie\'s guy, like Bernie himself, never had a chance. Clinton demanded Perez!') | |
o <- uclassify_get_classify_results(text_vec) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment