Created
February 2, 2018 08:30
-
-
Save toannhu/5e4ac68acc400c25a73a5941ff3168de to your computer and use it in GitHub Desktop.
Number_to_Text
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1 | 7/2012 |
---|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
7/2012 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import csv | |
import nltk | |
def get_filepaths(directory): | |
file_paths = [] | |
for root, directories, files in os.walk(directory): | |
for filename in files: | |
file_path = os.path.join(root, filename) | |
file_paths.append(file_path) | |
return file_paths | |
def int_to_vn(num): | |
d = {0: 'không', 1: 'một', 2: 'hai', 3: 'ba', 4: 'bốn', 5: 'năm', 6: 'sáu', 7: 'bảy', 8: 'tám', 9: 'chín', 10: 'mười'} | |
if num <= 10: return d[num] | |
if num//1000000 > 0: | |
if num % 1000000 == 0: return int_to_vn(num // 1000000) + " triệu" | |
if num%1000000 <= 10: | |
return int_to_vn(num//1000000) + " triệu không nghìn không trăm linh "+int_to_vn(num % 1000000) | |
if num % 1000000 < 100: | |
return int_to_vn(num // 1000000) + " triệu không nghìn không trăm " + int_to_vn(num % 1000000) | |
if num % 1000000 < 1000: | |
return int_to_vn(num // 1000000) + " triệu không nghìn " + int_to_vn(num % 1000000) | |
if num % 1000000 != 0: | |
return int_to_vn(num // 1000000) + " triệu " + int_to_vn(num % 1000000) | |
if num // 1000 > 0: | |
if num % 1000 == 0: return int_to_vn(num//1000) + " nghìn" | |
if num%1000 <=10: | |
return int_to_vn(num//1000) + " nghìn không trăm linh "+int_to_vn(num%1000) | |
if num%1000 <100: | |
return int_to_vn(num//1000) + " nghìn không trăm "+int_to_vn(num%1000) | |
if num%1000 != 0: | |
return int_to_vn(num//1000) + " nghìn "+int_to_vn(num%1000) | |
if num // 100 > 0: | |
if num%100 == 0: | |
return int_to_vn(num // 100) + " trăm" | |
if num%100 <10: | |
return int_to_vn(num//100) + " trăm linh " + int_to_vn(num%100) | |
if num%100 == 10: | |
return int_to_vn(num//100) + " trăm mười" | |
if num%100 != 0: | |
return int_to_vn(num//100) + " trăm " + int_to_vn(num%100) | |
if num // 10 > 0 and num >= 20: | |
if num%10 != 0: | |
if num%10 == 5: | |
return int_to_vn(num//10) + ' mươi lăm' | |
if num%10 == 1: | |
return int_to_vn(num//10) + ' mươi mốt' | |
if num%10 == 4: | |
return int_to_vn(num//10) + ' mươi tư' | |
return int_to_vn(num // 10) + ' mươi ' + int_to_vn(num % 10) | |
return int_to_vn(num//10) + ' mươi' | |
if num // 10 > 0: | |
if num == 15: | |
return 'mười lăm' | |
return "mười "+ d[num%10] | |
def processNumber(input, output): | |
with open(output, 'w') as fout: | |
with open(input, 'r') as fin: | |
for line in fin.readlines(): | |
out ='' | |
tokens = nltk.word_tokenize(line) | |
for word in tokens: | |
word_out = word | |
if word.isdigit(): | |
word_out = int_to_vn(int(word)) | |
date = word.split('/') | |
if len(date)==3 and date[0].isdigit() and date[1].isdigit() and date[2].isdigit(): | |
word_out = ' '+int_to_vn(int(date[0]))+' tháng '+int_to_vn(int(date[1]))+' năm ' +int_to_vn(int(date[2])) | |
if len(date) == 2 and date[0].isdigit() and date[1].isdigit() and int(date[1])>12: | |
word_out = 'tháng ' + int_to_vn(int(date[0])) + ' năm ' + int_to_vn(int(date[1])) | |
if len(date) == 2 and date[0].isdigit() and date[1].isdigit() and int(date[1])<=12: | |
word_out = ' '+int_to_vn(int(date[0])) + ' tháng ' + int_to_vn(int(date[1])) | |
if len(date) == 2 and not date[0].isdigit() and not date[1].isdigit(): | |
word_out = date[0]+ ' trên '+ date[1] | |
num = word.split('.') | |
if len(num) ==2 and num[0].isdigit() and num[1].isdigit(): | |
word_out = int_to_vn(int(num[0]+num[1])) | |
mnum = word.split(',') | |
if len(mnum) ==2 and mnum[0].isdigit() and mnum[1].isdigit(): | |
word_out = int_to_vn(int(mnum[0]+mnum[1])) | |
if word_out == '%': word_out = 'phần trăm' | |
if word_out == tokens[0] or word_out == ',' or word_out == ';' or word_out == '.' or tokens[tokens.index(word) -1] == '(' or\ | |
word_out == ':' or word_out == '?' or word_out == '!' or word_out == ')' or word_out == '\'': | |
out += word_out | |
else: out+= ' ' + word_out | |
fout.write(out+'\n') | |
if __name__=="__main__": | |
# processNumber('input.txt', 'output.txt') | |
# processNumber('input.csv', 'output.csv') | |
print(int_to_vn(1000005)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment