This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Kakao Hangul Analyzer III | |
__version__ = '0.3' | |
__author__ = 'Kakao Corp.' | |
__copyright__ = 'Copyright (C) 2018-, Kakao Corp. All rights reserved.' | |
__license__ = 'Apache 2.0' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
outputdebug = True | |
def debug(msg): | |
if outputdebug: | |
print(msg) | |
class Node(): | |
def __init__(self, key): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import frontmatter | |
import glob | |
import yaml | |
import string | |
from konlpy.tag import Komoran | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
def get_posts(folder='C:/Users/ratsgo/GoogleDrive/내폴더/git/blog/_posts'): | |
result = {} | |
for filepath in glob.glob(folder + "/*"): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import Counter | |
import random | |
def p_topic_given_document(topic, d, alpha=0.1): | |
return ((document_topic_counts[d][topic] + alpha) / | |
(document_lengths[d] + K * alpha)) | |
def p_word_given_topic(word, topic, beta=0.1): | |
return ((topic_word_counts[topic][word] + beta) / | |
(topic_counts[topic] + V * beta)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# Copyright (c) 2012, Chi-En Wu | |
from math import log | |
def _normalize_prob(prob, item_set): | |
result = {} | |
if prob is None: | |
number = len(item_set) | |
for item in item_set: | |
result[item] = 1.0 / number |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(stringr) | |
# loading | |
DTM <- readRDS('dtm.rds') | |
vec <- read.csv('word2vec.txt', fileEncoding='utf-8', sep=" ", header=F, skip=1) | |
# distance matrix | |
distance <- dist(vec[,-1]) | |
distance <- as.matrix(distance) | |
colnames(distance) <- vec[,1] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
from pandas import read_table | |
import numpy as np | |
import math | |
class NaiveBayesClassifier: | |
def __init__(self, k=0.5): | |
self.k = k | |
self.word_probs = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""This file contains code for use with "Think Bayes", | |
by Allen B. Downey, available from greenteapress.com | |
Copyright 2012 Allen B. Downey | |
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html | |
""" | |
"""This file contains class definitions for: | |
Hist: represents a histogram (map from values to integer frequencies). |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import re | |
import tensorflow as tf | |
import random | |
import pickle | |
from collections import defaultdict | |
import operator | |
#################################################### |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import time | |
import datetime | |
from tensorflow import flags | |
import tensorflow as tf | |
import numpy as np | |
class TextCNN(object): | |
""" | |
A CNN for text classification. |
NewerOlder