Last active
September 11, 2022 08:01
-
-
Save samirsaci/f83a7d9ca5ab922296fc2315c99f51b8 to your computer and use it in GitHub Desktop.
Productivity
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import jieba, itertools | |
def to_pinyin(sentence): | |
segments = jieba.cut(sentence) | |
output = " ".join(segments) | |
pinyined = pinyin(output) | |
combined = list(itertools.chain.from_iterable(pinyined)) | |
return ''.join(combined) | |
# Import | |
words = pd.read_excel('vocabulary.xlsx', encoding='GBK') | |
# Add pinyin | |
words['Pinyin'] = words['Mandarin'].apply(to_pinyin) | |
# Export | |
words.to_excel('vocabulary_pinyin.xlsx') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment