Skip to content

Instantly share code, notes, and snippets.

View ikegami-yukino's full-sized avatar

IKEGAMI Yukino ikegami-yukino

View GitHub Profile
@ikegami-yukino
ikegami-yukino / japanese_lexical_density.py
Created July 31, 2015 05:00
Japanese Lexical Density
"""
Lexical Density
http://web.archive.org/web/20110810174351/http://www.unisanet.unisa.edu.au/Resources/la/Readability/Content%20words%20and%20lexical%20density.htm
"""
from __future__ import division
import MeCab
CONTENT_WORD_POS = ('名詞', '動詞', '形容詞', '副詞')
@ikegami-yukino
ikegami-yukino / fb_categories.json
Created August 4, 2015 07:43
Facebook page category list
{
"Airline": "Airline Industry Services",
"American Restaurant": "New American Restaurant",
"Amusement Park Ride": "Roller Coaster",
"Amusement": "Arcade",
"Amusement": "Bingo Hall",
"Amusement": "Go Karting",
"Amusement": "Laser Tag",
"Antiques & Vintage": "Antique Store",
"Antiques & Vintage": "Auction House",
{
"401(K)s": ["Finance", "Investing", "Retirement Investments", "401(K)s"],
"Accommodations": ["Travel & Tourism", "Accommodations"],
"Accounting & Auditing": ["Finance", "Accounting & Auditing"],
"Acne": ["Health", "Health Conditions & Concerns", "Skin Conditions & Skin Health", "Acne"],
"Air Travel": ["Travel & Tourism", "Air Travel"],
"Airline Tickets, Fares & Flights": ["Travel & Tourism", "Air Travel", "Airline Tickets, Fares & Flights"],
"Alternative & Natural Medicine": ["Health", "Health Care Services", "Alternative & Natural Medicine"],
"Anti-Aging": ["Beauty & Personal Care", "Anti-Aging"],
"Anti-Virus Software": ["Computers", "Software", "Internet Software & Web Goodies", "Network Security Software", "Anti-Virus Software"],
@ikegami-yukino
ikegami-yukino / simple_crawler.py
Last active August 29, 2015 14:26
意識の低い単純クローラー
# -*- coding: utf-8 -*-
import os
import re
from encodings.aliases import aliases
import nkf
import tornado
from tornado import httpclient, gen
git clone --depth 1 https://github.com/neologd/mecab-ipadic-neologd.git /tmp/mecab-ipadic-neologd
bash /tmp/mecab-ipadic-neologd/bin/install-mecab-ipadic-neologd -n -y
rm -rf /tmp/mecab-ipadic-neologd
@ikegami-yukino
ikegami-yukino / mac_word2vec_install.sh
Last active May 28, 2019 19:41
Install word2vec to Mac OS X later than 10.9
pushd . &> /dev/null
cd /tmp
git clone --depth=1 https://github.com/tmikolov/word2vec
cd word2vec
sed -i -e 's/malloc.h/stdlib.h/g' *.c
make
rm *.c* *.txt makefile LICENSE
cp * /usr/local/bin
popd &> /dev/null
import heapq
from collections import deque
class TopK():
def __init__(self, k=5):
self.k = k
self._initialize()
@ikegami-yukino
ikegami-yukino / sparse_eliminate_zero_raws.py
Last active December 29, 2015 05:15
scipyのsparseから0の行をカットする
import numpy as np
def eliminate_zero_raws(x):
return x[np.unique(x.nonzero()[0])]
@ikegami-yukino
ikegami-yukino / arabic2chinese.py
Created September 3, 2015 02:55
Convert Arabic numerals to Chinese numerals
CHINESE_MAP = {'1': '一', '2': '二', '3': '三', '4': '四', '5': '五', '6': '六', '7': '七', '8': '八', '9': '九'}
CHINESE_DIGITS = ('十', '百', '千', '万', '十万', '百万', '千万', '億', '十億', '百億', '千億', '兆', '十兆', '百兆', '千兆')
def arabic2chinese(arabic):
chinese = []
if len(arabic) == '0':
return '〇'
arabic = arabic.replace(',', '')
for (i, num) in enumerate(arabic[::-1]):
if num == '0':
@ikegami-yukino
ikegami-yukino / dict2sparse.py
Last active May 6, 2021 09:01
dict to scipy.sparse
import numpy as np
from scipy.sparse import csr_matrix
def dict2sparse(d):
data = list(d.values())
indices = list(d.keys())
indptr = [0, len(d)]
return csr_matrix((data, indices, indptr), shape=(1, max(d)+1), dtype=np.uint32)