bryanyang0528 · August 29, 2015 14:04
diff --git a/gistfile1.py b/gistfile1.py
 import codecs
 #處理編碼的套件
 import operator
 ##處理字典檔排序的套件
 

 cutlist = "<>/:：;；,、＂’，.。！？｢\"\'\\\n\r《》“”!@#$%^&*()".decode("utf-8")  ##列出標點符號，並轉換成utf-8的格式


 def cutSentence(text_path, keywords): ##放入原始文章路徑, 增加斷詞的list
    text = codecs.open(text_path,"r","utf-8")   #開檔
    sentence = ""
    textList = []
       
    for line in text.readlines():
        line = line.strip() ##清除空白
        
        for keyword in keywords:  #清除關鍵字
            line = "".join(line.split(keyword))
            
        for word in line:
            if word not in cutlist: #如果文字不是標點符號，就把字加到句子中
                sentence += word
                #print sentence
            else:
                textList.append(sentence) #如果遇到標點符號，把句子加到 text list中
                sentence = ""
                #print textList
    return textList#傳回一個文字陣列
	import codecs
	#處理編碼的套件
	import operator
	##處理字典檔排序的套件


	cutlist = "<>/:：;；,、＂’，.。！？｢\"\'\\\n\r《》“”!@#$%^&*()".decode("utf-8") ##列出標點符號，並轉換成utf-8的格式


	def cutSentence(text_path, keywords): ##放入原始文章路徑, 增加斷詞的list
	text = codecs.open(text_path,"r","utf-8") #開檔
	sentence = ""
	textList = []

	for line in text.readlines():
	line = line.strip() ##清除空白

	for keyword in keywords: #清除關鍵字
	line = "".join(line.split(keyword))

	for word in line:
	if word not in cutlist: #如果文字不是標點符號，就把字加到句子中
	sentence += word
	#print sentence
	else:
	textList.append(sentence) #如果遇到標點符號，把句子加到 text list中
	sentence = ""
	#print textList
	return textList#傳回一個文字陣列