Skip to content

Instantly share code, notes, and snippets.

View yosuke-yasuda's full-sized avatar

Yosuke Yasuda yosuke-yasuda

View GitHub Profile
# Path to your oh-my-zsh installation.
export ZSH=$HOME/.oh-my-zsh
# Set name of the theme to load.
# Look in ~/.oh-my-zsh/themes/
# Optionally, if you set this to "random", it'll load a random theme each
# time that oh-my-zsh is loaded.
ZSH_THEME="mortalscumbag"
# Uncomment the following line to use case-sensitive completion.
#' Get idf for terms
calc_idf <- function(document, term, log_scale = log, smooth_idf = FALSE){
loadNamespace("Matrix")
loadNamespace("text2vec")
if(length(document)!=length(term)){
stop("length of document and terms have to be the same")
}
doc_fact <- as.factor(document)
term_fact <- as.factor(term)
sparseMat <- Matrix::sparseMatrix(i = as.numeric(doc_fact), j = as.numeric(term_fact))
mecab_tokenize <- function(tbl, text_col, .drop=TRUE){
loadNamespace("RMeCab")
loadNamespace("tidyr")
text_cname <- as.character(substitute(text_col))
text <- tbl[[text_cname]]
tokenize <- function(text){
tokens <- unlist(RMeCab::RMeCabC(text))
data.frame(.token = tokens, .pos = names(tokens))
}
if(.drop){