nkt1546789 · February 6, 2018 14:56
diff --git a/bsm.py b/bsm.py
 # coding: utf-8 -*-

 import MeCab
 import numpy as np

 m = MeCab.Tagger("-Ochasen")

 def sent_tokenize(text):
    if type(text) is unicode:
        text = text.encode("utf8")
    node = m.parseToNode(text)
    sentences = []
    sentence = []
    while node:
        sentence.append(unicode(node.surface, "utf8"))        
        if node.surface == "。":
            sentences.append(sentence)
            sentence = []
        node = node.next
    return sentences

 def get_freqdict(sentences):
    freqdict = {}
    N = 0
    for sentence in sentences:
        for word in sentence:
            freqdict.setdefault(word, 0.)
            freqdict[word] += 1
            N += 1
    return freqdict

 def score(sentence, freqdict):
    return np.sum([np.log(freqdict[word]) for word in sentence]) / len(sentence)

 def direct_proportion(i, n):
    return float(n-i+1)/n

 def inverse_proportion(i, n):
    return 1.0 / i

 def geometric_sequence(i, n):
    return 0.5 ** (i-1)

 def inverse_entropy(p):
    if p == 1.0 or 0.0:
        return 1.0
    return 1-(-p*np.log(p) - (1-p)*np.log(1-p))

 def inverse_entropy_proportion(i, n):
    p = i / n
    return inverse_entropy(p)

 def summarize(text, limit=100, **options):
    """
    text: target text
    limit: summary length limit
    option: 
    -m: summarization mode
        0: basic summarization model
        1: using word position feature
    -f: feature function
        0: direct proportion (DP)
        1: inverse proportion (IP)
        2: Geometric sequence (GS)
        3: Binary function (BF)
        4: Inverse entropy 
    """
    sentences = sent_tokenize(text)
    freqdict = get_freqdict(sentences)
    if options["m"] == 0:
        scores = [score(sentence, freqdict) for sentence in sentences]
    if options["m"] == 1:
        if options["f"] == 0:
            word_features = direct_proportion
        elif options["f"] == 1:
            word_features = inverse_proportion
        elif options["f"] == 2:
            word_features = geometric_sequence
        elif options["f"] == 4:
            word_features = inverse_entropy_proportion

        scores = []
        feature_dict = {}
        for sentence in sentences:
            sent_score = 0.0
            for word in sentence:
                feature_dict.setdefault(word, 0.0)
                feature_dict[word] += 1
                sent_score += np.log(freqdict[word]) * word_features(feature_dict[word], freqdict[word])
            sent_score /= len(sentence)
            scores.append(sent_score)

    topics = []
    length = 0
    for index in sorted(range(len(scores)), key=lambda k: scores[k], reverse=True):
        length += len(sentences[index])
        if length > limit: break
        topics.append(index)
    topics = sorted(topics)
    return "".join(["".join(sentences[topic]) for topic in topics])

 if __name__ == '__main__':
    """
    A text below comes from http://www.lifehacker.jp/2014/01/140121tabroid_dionote.html .
    """
    test_title = u"真に「使える」手書きメモアプリだと思わせてくれた『DioNote』"
    test_text = u"""Android：手書きメモアプリが使えないのは過去の話になったかも。
 「手書きメモアプリ」と聞くだけで、筆者は敬遠するところがありました。今までいくつかのアプリを試してきて、うまく文字が書けたり、正しく反映されたためしがなかったのです。大人しくキーボードから入力するメモが一番だ、と。
 ですが、今回紹介する『DioNote』は、手書き反映の機敏さといい、認識力の高さといい、かなりの実力を持っていて、久々に「いいね！」と言いたくなるアプリでした。
 加えて、画像の挿入や文字入力、メモのショートカットをホームに置けるなど、細かな機能も実装されており、あらゆる点からなかなか使える仕上がりとなっています。
 早速、トップ画面右上のプラスマークからメモを作ってみます。ノートのようなデザインです。画面下部の領域に文字を手書きで入力していきます。一文字書いてみると、反応の正確さにビックリします。
 すぐさま一文字書いたことが認識され、新たな文字、さらに新たな文字...と、そのテンポの良さも素晴らしい。ちなみに、一文字ずつだけでなく、横に連続で書いていくことも可能です。
 画面右上のメニューから「キャンパス作成」をタップすると、真っさらな自由帳のような画面になります。ここでは画像の貼り付けも自由にでき、より気ままなメモを作成できます。
 """
    print test_title
    print "===================================================================================================="
    print test_text
    print 
    print test_title
    print "===================================================================================================="
    print summarize(test_text, m=0)
    print 
    print test_title
    print "===================================================================================================="
    print summarize(test_text, m=1, f=0)
    print 
    print test_title
    print "===================================================================================================="
    print summarize(test_text, m=1, f=1)
    print 
    print test_title
    print "===================================================================================================="
    print summarize(test_text, m=1, f=2)
    print 
    print test_title
    print "===================================================================================================="
    print summarize(test_text, m=1, f=4)
	# coding: utf-8 -*-

	import MeCab
	import numpy as np

	m = MeCab.Tagger("-Ochasen")

	def sent_tokenize(text):
	if type(text) is unicode:
	text = text.encode("utf8")
	node = m.parseToNode(text)
	sentences = []
	sentence = []
	while node:
	sentence.append(unicode(node.surface, "utf8"))
	if node.surface == "。":
	sentences.append(sentence)
	sentence = []
	node = node.next
	return sentences

	def get_freqdict(sentences):
	freqdict = {}
	N = 0
	for sentence in sentences:
	for word in sentence:
	freqdict.setdefault(word, 0.)
	freqdict[word] += 1
	N += 1
	return freqdict

	def score(sentence, freqdict):
	return np.sum([np.log(freqdict[word]) for word in sentence]) / len(sentence)

	def direct_proportion(i, n):
	return float(n-i+1)/n

	def inverse_proportion(i, n):
	return 1.0 / i

	def geometric_sequence(i, n):
	return 0.5 ** (i-1)

	def inverse_entropy(p):
	if p == 1.0 or 0.0:
	return 1.0
	return 1-(-pnp.log(p) - (1-p)np.log(1-p))

	def inverse_entropy_proportion(i, n):
	p = i / n
	return inverse_entropy(p)

	def summarize(text, limit=100, **options):
	"""
	text: target text
	limit: summary length limit
	option:
	-m: summarization mode
	0: basic summarization model
	1: using word position feature
	-f: feature function
	0: direct proportion (DP)
	1: inverse proportion (IP)
	2: Geometric sequence (GS)
	3: Binary function (BF)
	4: Inverse entropy
	"""
	sentences = sent_tokenize(text)
	freqdict = get_freqdict(sentences)
	if options["m"] == 0:
	scores = [score(sentence, freqdict) for sentence in sentences]
	if options["m"] == 1:
	if options["f"] == 0:
	word_features = direct_proportion
	elif options["f"] == 1:
	word_features = inverse_proportion
	elif options["f"] == 2:
	word_features = geometric_sequence
	elif options["f"] == 4:
	word_features = inverse_entropy_proportion

	scores = []
	feature_dict = {}
	for sentence in sentences:
	sent_score = 0.0
	for word in sentence:
	feature_dict.setdefault(word, 0.0)
	feature_dict[word] += 1
	sent_score += np.log(freqdict[word]) * word_features(feature_dict[word], freqdict[word])
	sent_score /= len(sentence)
	scores.append(sent_score)

	topics = []
	length = 0
	for index in sorted(range(len(scores)), key=lambda k: scores[k], reverse=True):
	length += len(sentences[index])
	if length > limit: break
	topics.append(index)
	topics = sorted(topics)
	return "".join(["".join(sentences[topic]) for topic in topics])

	if __name__ == '__main__':
	"""
	A text below comes from http://www.lifehacker.jp/2014/01/140121tabroid_dionote.html .
	"""
	test_title = u"真に「使える」手書きメモアプリだと思わせてくれた『DioNote』"
	test_text = u"""Android：手書きメモアプリが使えないのは過去の話になったかも。
	「手書きメモアプリ」と聞くだけで、筆者は敬遠するところがありました。今までいくつかのアプリを試してきて、うまく文字が書けたり、正しく反映されたためしがなかったのです。大人しくキーボードから入力するメモが一番だ、と。
	ですが、今回紹介する『DioNote』は、手書き反映の機敏さといい、認識力の高さといい、かなりの実力を持っていて、久々に「いいね！」と言いたくなるアプリでした。
	加えて、画像の挿入や文字入力、メモのショートカットをホームに置けるなど、細かな機能も実装されており、あらゆる点からなかなか使える仕上がりとなっています。
	早速、トップ画面右上のプラスマークからメモを作ってみます。ノートのようなデザインです。画面下部の領域に文字を手書きで入力していきます。一文字書いてみると、反応の正確さにビックリします。
	すぐさま一文字書いたことが認識され、新たな文字、さらに新たな文字...と、そのテンポの良さも素晴らしい。ちなみに、一文字ずつだけでなく、横に連続で書いていくことも可能です。
	画面右上のメニューから「キャンパス作成」をタップすると、真っさらな自由帳のような画面になります。ここでは画像の貼り付けも自由にでき、より気ままなメモを作成できます。
	"""
	print test_title
	print "===================================================================================================="
	print test_text
	print
	print test_title
	print "===================================================================================================="
	print summarize(test_text, m=0)
	print
	print test_title
	print "===================================================================================================="
	print summarize(test_text, m=1, f=0)
	print
	print test_title
	print "===================================================================================================="
	print summarize(test_text, m=1, f=1)
	print
	print test_title
	print "===================================================================================================="
	print summarize(test_text, m=1, f=2)
	print
	print test_title
	print "===================================================================================================="
	print summarize(test_text, m=1, f=4)
No results found