This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Path to your oh-my-zsh installation. | |
export ZSH=$HOME/.oh-my-zsh | |
# Set name of the theme to load. | |
# Look in ~/.oh-my-zsh/themes/ | |
# Optionally, if you set this to "random", it'll load a random theme each | |
# time that oh-my-zsh is loaded. | |
ZSH_THEME="mortalscumbag" | |
# Uncomment the following line to use case-sensitive completion. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Get idf for terms | |
calc_idf <- function(document, term, log_scale = log, smooth_idf = FALSE){ | |
loadNamespace("Matrix") | |
loadNamespace("text2vec") | |
if(length(document)!=length(term)){ | |
stop("length of document and terms have to be the same") | |
} | |
doc_fact <- as.factor(document) | |
term_fact <- as.factor(term) | |
sparseMat <- Matrix::sparseMatrix(i = as.numeric(doc_fact), j = as.numeric(term_fact)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mecab_tokenize <- function(tbl, text_col, .drop=TRUE){ | |
loadNamespace("RMeCab") | |
loadNamespace("tidyr") | |
text_cname <- as.character(substitute(text_col)) | |
text <- tbl[[text_cname]] | |
tokenize <- function(text){ | |
tokens <- unlist(RMeCab::RMeCabC(text)) | |
data.frame(.token = tokens, .pos = names(tokens)) | |
} | |
if(.drop){ |