Last active
April 21, 2018 12:05
-
-
Save prs-watch/95075dfc233a6ac1d0dd6ad2e32c8d0b to your computer and use it in GitHub Desktop.
Translate MLB players' name into Japanese.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| MLB Name Translater | |
| メジャーリーガー名をカタカナに変換します。 | |
| (例) Noah Syndergaard -> ノア・シンダーガード | |
| """ | |
| import pandas as pd | |
| import pickle | |
| class DictionaryCreator: | |
| """ | |
| 辞書生成クラス | |
| """ | |
| @classmethod | |
| def create_dictionary(cls): | |
| """ | |
| wikipediaのページから辞書を生成します。 | |
| """ | |
| base_url = 'https://ja.wikipedia.org/wiki/メジャーリーグベースボールの選手一覧' | |
| char = ord('A') | |
| end_char = ord('Z') | |
| dictionary = {} | |
| print('--- 辞書作成開始 ---') | |
| while char <= end_char: | |
| alph = chr(char) | |
| url = base_url + alph | |
| df = cls.get_df(url) | |
| if df is not None: | |
| cls.register_name(df, dictionary) | |
| print(alph + '...') | |
| char += 1 | |
| print('--- 辞書作成完了 ---') | |
| # 作成辞書をシリアライズして保持 | |
| with open('dict.pickle','wb') as handler: | |
| pickle.dump(dictionary, handler, protocol=pickle.HIGHEST_PROTOCOL) | |
| return dictionary | |
| def get_df(url): | |
| """ | |
| Wikipediaのページから辞書の元ネタを取得します。 | |
| """ | |
| try: | |
| data = pd.read_html(url, match='選手名', header=0)[0] | |
| except: | |
| return None | |
| # 整形処理 | |
| data = data.rename(columns={'選手名(日本語)': 'jp_name', '選手名(英語)':'en_name'}) | |
| data.drop('初年', axis=1) | |
| data.drop('最終年', axis=1) | |
| data.drop('特記事項', axis=1) | |
| return data[data['jp_name'].notnull()] | |
| def register_name(df, dictionary): | |
| """ | |
| 元ネタから辞書に名前を登録します。 | |
| Key=アルファベット名, value=カタカナ名 | |
| """ | |
| for idx, row in df.iterrows(): | |
| en = row['en_name'].split(' ') | |
| jp = row['jp_name'].split('・') | |
| enf = en[0] | |
| enl = ' '.join(en[1:]) | |
| jpf = jp[0] | |
| jpl = ' '.join(jp[1:]) | |
| dictionary[enf] = jpf | |
| dictionary[enl] = jpl | |
| class Translator: | |
| """ | |
| 変換器 | |
| """ | |
| @classmethod | |
| def translate(cls, names, dictionary): | |
| """ | |
| 名前を変換します。 | |
| """ | |
| for name in names: | |
| name_jp = cls._translate(name, dictionary) | |
| name_jp = name_jp.replace(' ', '・') | |
| print(name + ' -> ' + name_jp) | |
| def _translate(name, dictionary): | |
| """ | |
| 名前を変換します。privateメソッド。 | |
| """ | |
| # 辞書の並びを長さ降順に並び替え | |
| for key in sorted(dictionary.keys(), reverse=True, key=len): | |
| val = dictionary[key] | |
| name = name.replace(key, val) | |
| return name | |
| class UI: | |
| """ | |
| インターフェースクラス | |
| """ | |
| dictionary = {} | |
| @classmethod | |
| def execute(cls, names, command='pass'): | |
| """ | |
| 変換実行 | |
| """ | |
| # 辞書をデシリアライズ | |
| try: | |
| with open('dict.pickle', 'rb') as handler: | |
| cls.dictionary = pickle.load(handler) | |
| except: | |
| pass | |
| # 辞書が空、もしくはupdate要求が叩かれた場合は辞書を再生成します | |
| if len(cls.dictionary) == 0 or command == 'update': | |
| cls.dictionary = DictionaryCreator().create_dictionary() | |
| Translator.translate(names, cls.dictionary) | |
| if __name__ == '__main__': | |
| names = ['Noah Syndergaard'] | |
| UI.execute(names) # <- 辞書生成は1回だけ | |
| # UI.execute(names, 'update') # <- 辞書を再生成する | |
| # 出力 "Noah Syndergaard -> ノア・シンダーガード" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment