Created
July 23, 2020 14:38
-
-
Save inspirit941/9f758f7f748a735e5bcd31a6521f7eab to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"get_formant.ipynb의 사본","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"egYudDwg6mgO","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":139},"executionInfo":{"status":"ok","timestamp":1593138062229,"user_tz":-540,"elapsed":8236,"user":{"displayName":"성균관대이동건","photoUrl":"","userId":"01325739424861297145"}},"outputId":"979b8378-be2e-4ddc-ffc5-119cebbb8a2d"},"source":["!pip install praat-parselmouth"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Collecting praat-parselmouth\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/09/7b/9fa1172a63b6277603d27bb5613559b5a8888f58e68c1698017b87b0061d/praat_parselmouth-0.3.3-cp36-cp36m-manylinux1_x86_64.whl (9.0MB)\n","\u001b[K |████████████████████████████████| 9.0MB 2.7MB/s \n","\u001b[?25hRequirement already satisfied: numpy>=1.7.0 in /usr/local/lib/python3.6/dist-packages (from praat-parselmouth) (1.18.5)\n","Installing collected packages: praat-parselmouth\n","Successfully installed praat-parselmouth-0.3.3\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"7rKmsBH_6Wkx","colab_type":"code","colab":{}},"source":["import glob\n","import parselmouth\n","import pandas as pd\n","import os"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"8PQqISIH_KAC","colab_type":"code","colab":{}},"source":["path = os.getcwd()+'/sound/'"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"VWg5DAHb_eTh","colab_type":"code","colab":{}},"source":["failed_list = []\n","data = pd.DataFrame({\n"," \"times\":[],\n"," \"F0(pitch)\":[],\n"," \"F1\":[],\n"," \"F2\":[],\n"," 'F3':[],\n"," \"F4\":[],\n"," \"F5\":[],\n"," \"filename\":[]\n"," })\n","formants_value = ['F1','F2',\"F3\",\"F4\",\"F5\"]\n","for iters, soundpath in enumerate(lists, 1):\n"," try:\n"," # 음원파일을 사용하려면, 음원파일을 Sound객체로 변환해야 한다.\n"," Sound = parselmouth.Sound(soundpath)\n"," \n"," # Formant값을 추출하려면, Sound 객체에 to_formant_burg 함수를 사용한다. \n"," # time_step 변수로 formant 추출 시간단위를 지정할 수 있다.\n"," formant = Sound.to_formant_burg(time_step = 0.1)\n","\n"," # Pitch값을 추출하려면 to_pitch 함수를 사용한다.\n"," # 추출 방식에 따라 to_pitch_ac, to_pitch_cc 등 여러 함수가 지원된다. Docs의 API Reference에서 원하는 함수를 찾아 사용하면 됨.\n"," pitch = Sound.to_pitch()\n"," \n"," # formant 추출에 사용된 시간은 ts() 함수로 얻을 수 있다.\n"," df = pd.DataFrame({\"times\":formant.ts()})\n","\n"," for idx, col in enumerate(formants_value, 1):\n"," # 각 시간대별 F1 ~ F5를 얻기 위한 함수.\n"," # F1 ~ F5라는 각각의 String 값을 formant_number 인자로 전달해야 한다.\n"," df[col] = df['times'].map(lambda x: formant.get_value_at_time(formant_number = idx, time = x))\n"," \n"," # F0는 Formant 객체가 아니라 Pitch 객체에서 얻을 수 있다.\n"," df['F0(pitch)'] = df['times'].map(lambda x: pitch.get_value_at_time(time = x))\n"," # 파일 이름\n"," df['filename'] = soundpath.split(path)[-1]\n"," \n"," data = data.append(df)\n"," if iters % 10 == 0:\n"," print(\"success: \", soundpath.split(path)[-1])\n"," if iters % 100 == 0:\n"," print(data)\n"," data.to_csv(str(iters) + \".csv\")\n"," \n"," # praat 프로그램에서 음원을 인식하지 못하는 경우가 있다. 이 경우 Python을 써도 당연히 인식하지 못한다.\n"," except Exception as e:\n"," print(e)\n"," failed_list.append(soundpath.split(path)[-1])\n"," print(\"fail: \", soundpath.split(path)[-1])\n","\n","data.to_csv('finish.csv')"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"8StNqAOiwsqG","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":419},"executionInfo":{"status":"ok","timestamp":1593094350615,"user_tz":-540,"elapsed":772,"user":{"displayName":"성균관대이동건","photoUrl":"","userId":"01325739424861297145"}},"outputId":"df775754-914c-479d-ff36-e938c44c5d09"},"source":["data"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>times</th>\n"," <th>F0(pitch)</th>\n"," <th>F1</th>\n"," <th>F2</th>\n"," <th>F3</th>\n"," <th>F4</th>\n"," <th>F5</th>\n"," <th>filename</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>0.060</td>\n"," <td>NaN</td>\n"," <td>668.956919</td>\n"," <td>1573.228732</td>\n"," <td>2716.867429</td>\n"," <td>3584.648268</td>\n"," <td>NaN</td>\n"," <td>21.wav</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>0.160</td>\n"," <td>235.742471</td>\n"," <td>481.937873</td>\n"," <td>2317.762229</td>\n"," <td>3289.896208</td>\n"," <td>3725.789058</td>\n"," <td>NaN</td>\n"," <td>21.wav</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>0.260</td>\n"," <td>200.052051</td>\n"," <td>509.083329</td>\n"," <td>1341.888272</td>\n"," <td>1567.194317</td>\n"," <td>3386.816847</td>\n"," <td>3905.968303</td>\n"," <td>21.wav</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>0.360</td>\n"," <td>191.497127</td>\n"," <td>721.980693</td>\n"," <td>1718.222394</td>\n"," <td>3060.782565</td>\n"," <td>3561.439324</td>\n"," <td>NaN</td>\n"," <td>21.wav</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>0.460</td>\n"," <td>192.958886</td>\n"," <td>413.574857</td>\n"," <td>1452.808235</td>\n"," <td>2201.513314</td>\n"," <td>3574.820304</td>\n"," <td>4006.706467</td>\n"," <td>21.wav</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>1297</th>\n"," <td>129.735</td>\n"," <td>NaN</td>\n"," <td>724.083957</td>\n"," <td>2025.094259</td>\n"," <td>2964.735146</td>\n"," <td>5449.009564</td>\n"," <td>NaN</td>\n"," <td>22.wav</td>\n"," </tr>\n"," <tr>\n"," <th>1298</th>\n"," <td>129.835</td>\n"," <td>NaN</td>\n"," <td>1499.099465</td>\n"," <td>2490.380936</td>\n"," <td>5437.141035</td>\n"," <td>NaN</td>\n"," <td>NaN</td>\n"," <td>22.wav</td>\n"," </tr>\n"," <tr>\n"," <th>1299</th>\n"," <td>129.935</td>\n"," <td>NaN</td>\n"," <td>1499.100308</td>\n"," <td>2490.410592</td>\n"," <td>5437.140847</td>\n"," <td>NaN</td>\n"," <td>NaN</td>\n"," <td>22.wav</td>\n"," </tr>\n"," <tr>\n"," <th>1300</th>\n"," <td>130.035</td>\n"," <td>NaN</td>\n"," <td>1499.100445</td>\n"," <td>2490.441877</td>\n"," <td>5437.140680</td>\n"," <td>NaN</td>\n"," <td>NaN</td>\n"," <td>22.wav</td>\n"," </tr>\n"," <tr>\n"," <th>1301</th>\n"," <td>130.135</td>\n"," <td>NaN</td>\n"," <td>1499.101241</td>\n"," <td>2490.484445</td>\n"," <td>5437.140548</td>\n"," <td>NaN</td>\n"," <td>NaN</td>\n"," <td>22.wav</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>4869 rows × 8 columns</p>\n","</div>"],"text/plain":[" times F0(pitch) F1 ... F4 F5 filename\n","0 0.060 NaN 668.956919 ... 3584.648268 NaN 21.wav\n","1 0.160 235.742471 481.937873 ... 3725.789058 NaN 21.wav\n","2 0.260 200.052051 509.083329 ... 3386.816847 3905.968303 21.wav\n","3 0.360 191.497127 721.980693 ... 3561.439324 NaN 21.wav\n","4 0.460 192.958886 413.574857 ... 3574.820304 4006.706467 21.wav\n","... ... ... ... ... ... ... ...\n","1297 129.735 NaN 724.083957 ... 5449.009564 NaN 22.wav\n","1298 129.835 NaN 1499.099465 ... NaN NaN 22.wav\n","1299 129.935 NaN 1499.100308 ... NaN NaN 22.wav\n","1300 130.035 NaN 1499.100445 ... NaN NaN 22.wav\n","1301 130.135 NaN 1499.101241 ... NaN NaN 22.wav\n","\n","[4869 rows x 8 columns]"]},"metadata":{"tags":[]},"execution_count":21}]},{"cell_type":"code","metadata":{"id":"9siuD1S5_96l","colab_type":"code","colab":{}},"source":["import json\n","with open('failed_file.json', 'w') as f:\n"," json.dump(failed_list, f, indent = 4)"],"execution_count":null,"outputs":[]}]} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment