Skip to content

Instantly share code, notes, and snippets.

@akhileshravi
Last active December 26, 2019 19:33
Show Gist options
  • Save akhileshravi/d032d227aa5a553fadccd4679b74a0de to your computer and use it in GitHub Desktop.
Save akhileshravi/d032d227aa5a553fadccd4679b74a0de to your computer and use it in GitHub Desktop.
Assignment1_NLP_16110007
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# ASSIGNMENT 1\n",
"NLP\n",
"Akhilesh Ravi\n",
"16110007"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import re\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"from bs4 import BeautifulSoup"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv('tweets-dataset.csv')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sentence</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>@BubblyDentist @MeetUunngLee nahi nahi, mere s...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>going to the grammys first entertainment law b...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>bohot hi badiya ji aap sunao?</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Parvez Musharraf is Digvijay Singh of Pakistan...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>guddu ko bass john cena k sticker ki padii hai...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Aman ki maa ki... Asha https://twitter.com/ash...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>pakistan can wait more more and more . . . ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>@sagarcasm Jai Mahesh !!</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>RT AAP_Najafgarh \" RT AAPInNews : When DDCA lo...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>\"Kaam ho jayega, thoda kharcha paani lagega\" \\...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sentence\n",
"0 @BubblyDentist @MeetUunngLee nahi nahi, mere s...\n",
"1 going to the grammys first entertainment law b...\n",
"2 bohot hi badiya ji aap sunao?\n",
"3 Parvez Musharraf is Digvijay Singh of Pakistan...\n",
"4 guddu ko bass john cena k sticker ki padii hai...\n",
"5 Aman ki maa ki... Asha https://twitter.com/ash...\n",
"6 pakistan can wait more more and more . . . ...\n",
"7 @sagarcasm Jai Mahesh !!\n",
"8 RT AAP_Najafgarh \" RT AAPInNews : When DDCA lo...\n",
"9 \"Kaam ho jayega, thoda kharcha paani lagega\" \\..."
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[:10]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# s = re.sub('[^0-9a-zA-Z]+', '*', s)\n",
"# https://stackoverflow.com/questions/12985456/replace-all-non-alphanumeric-characters-in-a-string"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def cleanText(raw_text):\n",
" '''\n",
" Convert a raw review to a cleaned review\n",
" '''\n",
" text = BeautifulSoup(raw_text, 'html').get_text() #remove html\n",
" words = text.split()\n",
" words = [w for w in words if '@' not in w and '#' not in w] # remove the @-words and #-words\n",
" text = ' '.join(words)\n",
" letters_only = re.sub('[^a-zA-Z]+', ' ', text) # remove non-character\n",
" \n",
" return( letters_only.lower())\n",
"\n",
"vclean = np.vectorize(cleanText)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sentence</th>\n",
" <th>Cleaned sentence</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>@BubblyDentist @MeetUunngLee nahi nahi, mere s...</td>\n",
" <td>nahi nahi mere saath jaakar pachtaogi ye uunng...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>going to the grammys first entertainment law b...</td>\n",
" <td>going to the grammys first entertainment law b...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>bohot hi badiya ji aap sunao?</td>\n",
" <td>bohot hi badiya ji aap sunao</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Parvez Musharraf is Digvijay Singh of Pakistan...</td>\n",
" <td>parvez musharraf is digvijay singh of pakistan...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>guddu ko bass john cena k sticker ki padii hai...</td>\n",
" <td>guddu ko bass john cena k sticker ki padii hai...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Aman ki maa ki... Asha https://twitter.com/ash...</td>\n",
" <td>aman ki maa ki asha https twitter com ashabhos...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>pakistan can wait more more and more . . . ...</td>\n",
" <td>pakistan can wait more more and more aakhir pa...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>@sagarcasm Jai Mahesh !!</td>\n",
" <td>jai mahesh</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>RT AAP_Najafgarh \" RT AAPInNews : When DDCA lo...</td>\n",
" <td>rt aap najafgarh rt aapinnews when ddca lowere...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>\"Kaam ho jayega, thoda kharcha paani lagega\" \\...</td>\n",
" <td>kaam ho jayega thoda kharcha paani lagega sir...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sentence \\\n",
"0 @BubblyDentist @MeetUunngLee nahi nahi, mere s... \n",
"1 going to the grammys first entertainment law b... \n",
"2 bohot hi badiya ji aap sunao? \n",
"3 Parvez Musharraf is Digvijay Singh of Pakistan... \n",
"4 guddu ko bass john cena k sticker ki padii hai... \n",
"5 Aman ki maa ki... Asha https://twitter.com/ash... \n",
"6 pakistan can wait more more and more . . . ... \n",
"7 @sagarcasm Jai Mahesh !! \n",
"8 RT AAP_Najafgarh \" RT AAPInNews : When DDCA lo... \n",
"9 \"Kaam ho jayega, thoda kharcha paani lagega\" \\... \n",
"\n",
" Cleaned sentence \n",
"0 nahi nahi mere saath jaakar pachtaogi ye uunng... \n",
"1 going to the grammys first entertainment law b... \n",
"2 bohot hi badiya ji aap sunao \n",
"3 parvez musharraf is digvijay singh of pakistan... \n",
"4 guddu ko bass john cena k sticker ki padii hai... \n",
"5 aman ki maa ki asha https twitter com ashabhos... \n",
"6 pakistan can wait more more and more aakhir pa... \n",
"7 jai mahesh \n",
"8 rt aap najafgarh rt aapinnews when ddca lowere... \n",
"9 kaam ho jayega thoda kharcha paani lagega sir... "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['Cleaned sentence'] = vclean(data['Sentence'])\n",
"data[:10]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 nahi nahi mere saath jaakar pachtaogi ye uunng...\n",
"1 going to the grammys first entertainment law b...\n",
"2 bohot hi badiya ji aap sunao \n",
"3 parvez musharraf is digvijay singh of pakistan...\n",
"4 guddu ko bass john cena k sticker ki padii hai...\n",
"5 aman ki maa ki asha https twitter com ashabhos...\n",
"6 pakistan can wait more more and more aakhir pa...\n",
"7 jai mahesh \n",
"8 rt aap najafgarh rt aapinnews when ddca lowere...\n",
"9 kaam ho jayega thoda kharcha paani lagega sir...\n",
"Name: Cleaned sentence, dtype: object"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[:10]['Cleaned sentence']"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def tokenize(s):\n",
" return tuple(s.split())"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of tokens: 303161\n",
"Number of word types: 32707\n"
]
}
],
"source": [
"tokens = []\n",
"for i in range(len(data)):\n",
" tokens.extend( tokenize( data.iloc[i]['Cleaned sentence']) )\n",
"sorted_tokens = sorted(tokens)\n",
"word_types = list(set(tokens))\n",
"print('Number of tokens:', len(tokens))\n",
"print('Number of word types:', len(word_types))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TTR: 0.10788656852299604\n"
]
}
],
"source": [
"print('TTR:', len(word_types)/len(tokens))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 2. Zipf's Law"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"token_count = {}\n",
"for s in sorted_tokens:\n",
" if s in token_count:\n",
" token_count[s] += 1\n",
" else:\n",
" token_count[s] = 1"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Top 50 Word types in decreasing order of number of occurences:\n",
"\n"
]
},
{
"data": {
"text/plain": [
"[('hai', 10030),\n",
" ('to', 4154),\n",
" ('ki', 3224),\n",
" ('ke', 3170),\n",
" ('nahi', 3169),\n",
" ('bhi', 2929),\n",
" ('the', 2866),\n",
" ('se', 2601),\n",
" ('ho', 2365),\n",
" ('ka', 2310),\n",
" ('bhai', 2266),\n",
" ('ko', 2208),\n",
" ('me', 1955),\n",
" ('ye', 1869),\n",
" ('kya', 1815),\n",
" ('hi', 1801),\n",
" ('aur', 1797),\n",
" ('twitter', 1760),\n",
" ('com', 1724),\n",
" ('kar', 1681),\n",
" ('i', 1509),\n",
" ('in', 1387),\n",
" ('t', 1319),\n",
" ('https', 1310),\n",
" ('is', 1296),\n",
" ('mein', 1276),\n",
" ('a', 1202),\n",
" ('ek', 1165),\n",
" ('and', 1126),\n",
" ('status', 1108),\n",
" ('of', 1074),\n",
" ('on', 1071),\n",
" ('na', 1026),\n",
" ('s', 1009),\n",
" ('ab', 969),\n",
" ('toh', 963),\n",
" ('rt', 944),\n",
" ('tha', 937),\n",
" ('http', 905),\n",
" ('for', 885),\n",
" ('you', 885),\n",
" ('aaj', 873),\n",
" ('co', 872),\n",
" ('raha', 868),\n",
" ('par', 826),\n",
" ('ne', 824),\n",
" ('aap', 820),\n",
" ('hain', 816),\n",
" ('koi', 802),\n",
" ('kuch', 801),\n",
" ('liye', 780),\n",
" ('k', 754),\n",
" ('tu', 748),\n",
" ('ji', 747),\n",
" ('it', 704),\n",
" ('p', 702),\n",
" ('sir', 691),\n",
" ('d', 690),\n",
" ('do', 684),\n",
" ('pe', 667),\n",
" ('main', 665),\n",
" ('mujhe', 643),\n",
" ('gaya', 631),\n",
" ('rahe', 621),\n",
" ('h', 619),\n",
" ('baat', 600),\n",
" ('be', 598),\n",
" ('sab', 586),\n",
" ('with', 570),\n",
" ('at', 566),\n",
" ('he', 557),\n",
" ('aa', 554),\n",
" ('de', 536),\n",
" ('url', 532),\n",
" ('jo', 531),\n",
" ('yaar', 530),\n",
" ('kiya', 508),\n",
" ('hum', 499),\n",
" ('hota', 487),\n",
" ('le', 479),\n",
" ('tum', 475),\n",
" ('mere', 468),\n",
" ('this', 454),\n",
" ('diya', 452),\n",
" ('modi', 449),\n",
" ('log', 448),\n",
" ('ya', 425),\n",
" ('my', 424),\n",
" ('tomorrow', 424),\n",
" ('that', 419),\n",
" ('gaye', 415),\n",
" ('bas', 413),\n",
" ('din', 412),\n",
" ('hu', 410),\n",
" ('kabhi', 410),\n",
" ('abhi', 405),\n",
" ('u', 394),\n",
" ('india', 388),\n",
" ('day', 387),\n",
" ('time', 385)]"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sorted_token_count = list(token_count.items())\n",
"sorted_token_count.sort(key=lambda x:x[1], reverse=True)\n",
"print('Top 50 Word types in decreasing order of number of occurences:\\n')\n",
"sorted_token_count[:100]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 1.0, \"Zipf's Law\")"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAEWCAYAAABMoxE0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3deZhcdZ3v8fcnTQeazWbpyUAHSMTcMCBCsGURFxYliCJ5GFBcxoA43OfKdZ8oudd5wqKCExVQR53IrgyrERHQGAGRywyBxLAFCGRQSJpAIiQgmsEs3/vH+RWp7nR3ne7T1dWn+/N6nnqqzq/OqfM9RZFv/85vU0RgZmY2UGMaHYCZmZWbE4mZmRXiRGJmZoU4kZiZWSFOJGZmVogTiZmZFeJEYjYAkn4haXrOfcdJ+q2kP0n6Zr1jMxtqWzU6ALPhRtJHgH/r4a3tgFkRcW5EvKcfH3kG8Edgx4gISWcDRMTZ/YwrgEkRsaw/x5nVm2skZt1ExNURsX31A/gs8DzwwwF85F7Ao+HRvzZCOZGY1SBpCnARcEpErExlv5H0ifT6VEn3SPqupJckPS7p6PTeFcB04IuSXpH0rm6fvaukWyStlfSipLsl9ev/S0l7S7pD0guS/ijpakmt6b3TJP28at8nJd1Qtb1c0oED+mLMEt/aMutD+gf5RuC8iPhNH7sekvbbFTgRmCtpYkScKglgRUR8Oe3766rjvgCsANrS9qFAf2suAs4HfgvsCPwEOJusFnUXcGFKTn8LjAUOS9f2emB74KF+ns+sC9dIzHqhLANcBTwC/EuN3VcBF0XE+oi4DlgKvDfHadYDuwF7pWPv7u8tsIhYFhHzI+LViFgNfAt4Z3rvKeBPwIHAO4B5wLOS9kn73B0Rm/pzPrPuXCMx692XgP2AN+f4x72z2z5PA7vnOMdsstrDr1LNZU5EXNCfICWNAy4G3g7sQPYH4pqqXe4CjgDekF6vJUsih6Vts0JcIzHrgaQjgP8LnBQRa3Mc0p5qMBV7As/WOigi/hQRX4iI1wPvBz5faV/ph6+R3Q7bPyJ2BD5KdruropJI3p5e30WWSN6JE4kNAicSs24k7QZcC3w2IhbnPOxvgE9LapZ0MvB3wG05zvU+SW9ISeglYCPQ162msZK2qXo0kdVCXgFektQOzOh2zF3AkUBLRKwA7gaOBXYB8l6fWa+cSMy29I/AOODi1NOq+vGDXo5ZAEwiGy/yVbKazAs5zjWJrPH9FeA/ge9FxJ197L8EWFf1OA04BziILBHdCsytPiAinkiff3fafhl4CrgnIjbmiNGsT3LXdrNiJJ0KfCIi3tboWMwawTUSMzMrxInEzMwK8a0tMzMrxDUSMzMrZNQNSNx1111jwoQJjQ7DzKxUFi1a9MeIaOvpvVGXSCZMmMDChQsbHYaZWalIerq393xry8zMCnEiMTOzQpxIzMysECcSMzMrxInEzMwKGXW9turppsWdzJ63lGfXrmP31hZmTJ3MtCntjQ7LzKyu6lojkXSZpFWSHqkq21nS/LR29HxJO6VySfq2pGWSHpJ0UNUx09P+T0qaXlX+ZkkPp2O+3W09iCF10+JOZs59mM616wigc+06Zs59mJsWdzYqJDOzIVHvW1tXkK17UO0s4PaImATcnrYB3kM2pfYk4Azg+5AlHmAW2ZrYBwOzKskn7fOPVcd1P9eQmT1vKevWd52Re936jcyet7RBEZmZDY26JpKI+C3wYrfiE4Ar0+srgWlV5VdF5l6gNS0wNBWYHxEvRsQaYD5wbHpvx4i4Ny1xelXVZw25Z9eu61e5mdlI0YjG9nERsTK9fo5sASGAdmB51X4rUllf5St6KN+CpDMkLZS0cPXq1cWvoAe7t7b0q9zMbKRoaK+tVJOo+/TDETEnIjoioqOtrcepYgqbMXUyLc1NXcpampuYMXVyXc5nZjZcNCKRPJ9uS1XWxl6VyjuBPar2G5/K+iof30N5Q0yb0s75J+5Pe2sLAtpbWzj/xP3da8vMRrxGdP+9GZgOXJCef1ZV/r8lXUvWsP5SRKyUNA/4WlUD+zHAzIh4UdLLkg4lWy/7Y8B3hvJCups2pd2Jw8xGnbomEknXAEcAu0paQdb76gLgekmnA08DH0i73wYcBywD/gKcBpASxnnA/Wm/cyOi0oD/SbKeYS3AL9LDzMyG0KhbIbGjoyM8jbyZWf9IWhQRHT295ylSzMysECcSMzMrxInEzMwKcSIxM7NCnEjMzKwQJxIzMyvEicTMzApxIjEzs0KcSMzMrBAnEjMzK8SJxMzMCnEiMTOzQpxIzMysECcSMzMrxInEzMwKcSIxM7NCnEjMzKwQJxIzMyvEicTMzApxIjEzs0KcSMzMrBAnEjMzK8SJxMzMCnEiMTOzQpxIzMysECcSMzMrxInEzMwKcSIxM7NCnEjMzKwQJxIzMyukYYlE0uckLZH0iKRrJG0jaaKkBZKWSbpO0ti079Zpe1l6f0LV58xM5UslTW3U9ZiZjVYNSSSS2oFPAx0R8UagCTgF+DpwYUS8AVgDnJ4OOR1Yk8ovTPshad903H7AscD3JDUN5bWYmY12jby1tRXQImkrYFtgJXAUcGN6/0pgWnp9QtomvX+0JKXyayPi1Yj4PbAMOHiI4jczMxqUSCKiE/gG8AxZAnkJWASsjYgNabcVQHt63Q4sT8duSPvvUl3ewzGvkXSGpIWSFq5evXrwL8jMbBRr1K2tnchqExOB3YHtyG5N1UVEzImIjojoaGtrq9dpzMxGpUbd2noX8PuIWB0R64G5wOFAa7rVBTAe6EyvO4E9ANL7rwNeqC7v4RgzMxsCjUokzwCHSto2tXUcDTwK3AmclPaZDvwsvb45bZPevyMiIpWfknp1TQQmAfcN0TWYmRlZg/eQi4gFkm4EfgdsABYDc4BbgWslfSWVXZoOuRT4kaRlwItkPbWIiCWSridLQhuAMyNi45BejJnZKKfsD/vRo6OjIxYuXNjoMMzMSkXSoojo6Ok9j2w3M7NCnEjMzKwQJxIzMyvEicTMzApxIjEzs0KcSMzMrBAnEjMzK8SJxMzMCnEiMTOzQpxIzMysECcSMzMrxInEzMwKcSIxM7NCnEjMzKyQXOuRSDocOBvYKx0jICLi9fULzczMyiDvwlaXAp8DFgFeOMrMzF6TN5G8FBG/qGskZmZWSnkTyZ2SZgNzgVcrhRHxu7pEZWZmpZE3kRySnquXWQzgqMENx8zMyiZXIomII+sdiJmZlVOu7r+Sxkm6VNIv0va+kk6vb2hmZlYGeceRXAHMA3ZP208An61HQGZmVi55E8muEXE9sAkgIjbgbsBmZkb+RPJnSbuQNbAj6VDgpbpFZWZmpZG319bngZuBvSXdA7QBJ9UtKjMzK428iWQx8E5gMtn0KEvxPF1mZkb+ZHBpRGyIiCUR8QgwFritjnGZmVlJ5E0knZK+ByBpJ2A+8OO6RWVmZqWRK5FExD8Dr0j6AfAr4JsRcXldIzMzs1Los41E0olVmwuAfwbuA0LSiRExt57BmZnZ8Fersf34btuLgeZUHmSTOJqZ2SjWZyKJiNPqdWJJrcAlwBvJktLHyXqDXQdMAP4AfCAi1kgScDFwHPAX4NTKzMOSpgNfTh/7lYi4sl4xm5nZlvLOtTVe0k8lrUqPn0gaX/DcFwO/jIh9gAOAx4CzgNsjYhJwe9oGeA8wKT3OAL6f4toZmEU2O/HBwKzUGcDMzIZI3l5bl5MNSNw9PX6eygZE0uuAd5CtvEhE/DUi1gInAJUaxZXAtPT6BOCqyNwLtEraDZgKzI+IFyNiDVlvsmMHGpeZmfVf3kTSFhGXp7EkGyLiCrLR7QM1EVgNXC5psaRLJG0HjIuIlWmf54Bx6XU7sLzq+BWprLfyLiSdIWmhpIWrV68uELaZmXWXN5G8IOmjkprS46PACwXOuxVwEPD9iJgC/JnNt7EAiIggze1VVETMiYiOiOhoayuS/8zMrLu8ieTjwAfIagkryebZOrXAeVcAKyJiQdq+kSyxPJ9uWZGeV6X3O4E9qo4fn8p6KzczsyGSN5GMj4j3R0RbRPxNREwD9hzoSSPiOWC5pMmp6GjgUbJ2mOmpbDrws/T6ZuBjyhwKvJRugc0DjpG0U2pkPyaVmZnZEMk7aeN3yGoMtcr641PA1ZLGAk8Bp5EltuvT6otPk9WCIJvX6zhgGVn339MAIuJFSecB96f9zo2IFwvEZGZm/VRrZPthwFuBNkmfr3prR6CpyIkj4gGgo4e3ju5h3wDO7OVzLgMuKxKLmZkNXK0ayVhg+7TfDlXlL+P1SMzMjNoj2+8C7pJ0RUQ8PUQxmZlZieSd/ddJxMzMeuRVDs3MrJA+E4mkr6fnk4cmHDMzK5taNZLj0sy7M4ciGDMzK59avbZ+CawBtpf0MiCyaUtE1it3xzrHZ2Zmw1yfNZKImBERrcCtEbFjROxQ/TxEMZqZ2TCWa2R7RJwgaRzwllS0ICI8ja6ZmeVe2OpksrXaTyabtuQ+SR6QaGZmuefa+jLwlohYBSCpDfg12ay9ZmY2iuUdRzKmkkSSF/pxrJmZjWB5ayS/lDQPuCZtf5BsRl4zMxvl8ja2z5B0IvC2VDQnIn5av7DMzKws8tZIiIi5wNw6xmJmZiXkdg4zMyvEicTMzArJO47kzT2UvW/wwzEzs7LJWyP5oaQ3VjYkfQj45/qEZGZmZZK3sf0k4EZJHwbeDnwMOKZuUZmZWWnk7f77lKRTgJuAZ4BjImJdXSMzM7NS6DORSHqYbNr4ip2BJmCBJCLiTfUMzszMhr9aNRI3qJuZWZ/6TCQR8XTltaQmYFytY8zMbHTJlRQkfQqYBTwPbErFAfjWlpnZKJe3dvEZYHJEvFDPYMzMrHzyjiNZDrxUz0DMzKyc8tZIngJ+I+lW4NVKYUR8qy5RlcBNizuZPW8pz65dx+6tLcyYOplpU9obHZaZ2ZDLm0ieSY+x6TGq3bS4k5lzH2bd+o0AdK5dx8y5DwM4mZjZqJN3QOI59Q6kTGbPW/paEqlYt34js+ctdSIxs1En76SNbZJmS7pN0h2VR9GTS2qStFjSLWl7oqQFkpZJuk7S2FS+ddpelt6fUPUZM1P5UklTi8aUx7Nrex7U31u5mdlIlrex/WrgcWAicA7wB+D+QTj/Z4DHqra/DlwYEW8A1gCnp/LTgTWp/MK0H5L2BU4B9gOOBb6XxrvU1e6tLf0qNzMbyfImkl0i4lJgfUTcFREfB44qcmJJ44H3ApekbaXPvDHtciUwLb0+IW2T3j867X8CcG1EvBoRvweWAQcXiSuPGVMn09LcNV+1NDcxY+rkep/azGzYydvYvj49r5T0XuBZsnm3irgI+CKwQ9reBVgbERvS9gqg0uDQTtYFmYjYIOmltH87cG/VZ1Yf8xpJZwBnAOy5554Fw97coO5eW2Zm+RPJVyS9DvgC8B1gR+BzAz1pWhRrVUQsknTEQD8nr4iYA8wB6OjoiBq75zJtSrsTh5kZORJJanOYFBG3kA1KPHIQzns48H5JxwHbkCWmi4FWSVulWsl4oDPt3wnsAayQtBXwOuCFqvKK6mPMzGwI1GwjiYiNwIcG86QRMTMixkfEBLLG8jsi4iPAnWSLaAFMB36WXt+ctknv3xERkcpPSb26JgKTgPsGM1YzM+tb3ltb90j6LnAd8OdKYUT8bpDj+RJwraSvAIuBS1P5pcCPJC0DXiRLPkTEEknXA48CG4AzU+IzM7MhouwP+xo7SXf2UBwRUajnViN0dHTEwoULGx2GmVmpSFoUER09vZd3ZPtgtIuYmdkIlHcciZmZWY+cSMzMrBAnEjMzK6TfiUTSnHoEYmZm5TSQGkmPrfZmZjY65R1HUm3VoEdRQl4h0cws0+9EEhHH1iOQMrlpcSczbniQ9ZuyMTida9cx44YHgS1XSHTCMbORzo3tA3D2zUteSyIV6zcFZ9+8pEtZZUnezrXrCDYvyXvTYk8HZmYjhxPJAKxdtz5XeV9L8pqZjRQ1E0laDnfAU8aPZl6S18xGg4bM/lt2O23bnKvcS/Ka2Wgw3Gb/LYVZx+/HjBsfZP3Gze0kzU3ivW/ajcMvuOO1hvUj92njJ4s6u9ze8pK8ZjbS5E0kB6bnc6vKgoLrtpdVT0vtHrlPG9fdt7xLT67r7lvOBw/egzsfX+1eW2Y2YuWaRn4kqdc08gee86seG+FbW5p5YNYxg34+M7OhNOBp5CXtmfMcayPi5X5HNoLk7cllZjbS1Lq1dSXZLSz1sU8AVwBXDVJMZmZWIn0mEi9old9O2zaz5i9b1j566+FlZjZSeEDiIJl1/H40N3WtuDU3iVnH79egiMzMhoYTyQDdtLiTwy+4g4ln3crhF9wBwOyTDqC9tQUB7a0tzD7pAPfQMrMRbyCz/456lTm0KuNDKnNonX/i/txz1qjsEW1mo5hrJAPgObTMzDZzIhkAz6FlZraZE8kAeA4tM7PNnEgG4Mh92vpVbmY2kjmRDMAtD67MXd69d5cXtTKzkca9tgYg73QovfXugi2X5DUzKyvXSOrIvbvMbDRwIhmAMb3MPNa93L27zGw0cCIZgE29zLzfvdy9u8xsNGhIIpG0h6Q7JT0qaYmkz6TynSXNl/Rket4plUvStyUtk/SQpIOqPmt62v9JSdOHIv72XhJB9/IZUyfT0tzUpcwrJJrZSNOoGskG4AsRsS9wKHCmpH2Bs4DbI2IScHvaBngPMCk9zgC+D1niAWYBhwAHA7MqyaeeZkydTHO3+1jNY7RFgpg2pZ3zT9y/y/xb55+4vxvazWxEaUivrYhYCaxMr/8k6TGgHTgBOCLtdiXwG+BLqfyqyJZzvFdSq6Td0r7zI+JFAEnzgWOBa+p+Ed3bSXppN5k2pd2Jw8xGtIa3kUiaAEwBFgDjUpIBeA4Yl163A8urDluRynorr6vZ85ayfmPXBpH1G8O9scxsVGroOBJJ2wM/AT4bES9Lm/+sj4iQNCgLyks6g+yWGHvumXf14N519tLrqnPtOg6/4A6eXbuO3VtbmDF1smsjZjbiNaxGIqmZLIlcHRFzU/Hz6ZYV6XlVKu8E9qg6fHwq6628i4iYExEdEdHR1lZ8GpO+1h3uXLuOSM8zbnhwwCPZPSLezMqiUb22BFwKPBYR36p662ag0vNqOvCzqvKPpd5bhwIvpVtg84BjJO2UGtmPSWV1lbeatH5TcPbNS/r9+ZUR8dVJaebch51MzGxYalSN5HDgH4CjJD2QHscBFwDvlvQk8K60DXAb8BSwDPgh8EmA1Mh+HnB/epxbaXgfLnqbTqUvHhFvZmXSqF5b/4/e7xAd3cP+AZzZy2ddBlw2eNHVttO2zaz5S/8TRF4eEW9mZdLwXltlNOv4/Whu6qulpBiPiDezMnEiyaF7wzfA7JMO6DLQcDB5RLyZlYmnka+ht6ngzz9xf+4566jX9pty7q96vN2107bN/T5npcvw7HlL3ZXYzIY9J5Ia+mr4rv6Hfdbx+zHjxge7DFRsbhKzjt9vQOf1iHgzKwvf2qqhtwbuyuDDvm53zT7pACcDMxvxXCOpYffWlj5HsleeZ9zwILNPPqDL7a7e3LS407etzGzEcCKp4ch92vjxvc/U3K8y+LB7QuieNI7cp42fLOr08rtmNmL41lYNdz6+Ove+va3ZXj1C/ep7n/FgQzMbUZxIaujttlYePTXU9za9igcbmllZ+dZWDU0SGyPf7FrNY+gy+29/ktAYiYln3eo2EzMrHddIasibRADWb+o6+29vY997Kt8Y4QkazayUnEhqKDJqPdgyabQ0N/GRQ/d8rZtwk7ZMK24zMbMy8a2tGvL22upNkCWj3rr6Tjzr1h6Pc5uJmZWFayQ19KfXVk+aa3zDnqDRzMrOiaSGojWD7u0m3VdN9ASNZlZ2TiQ1DHbNoPuqidOmtHP+ift3mVrl/BP3d68tMysNt5HUMGPqZGbc8CDrN+XvvVVL94GLnqDRzMrMNZIcBjOJmJmNNK6R1DBz7kN1+dzqgYsegGhmZeZEUsO69Zvq8rnVMwd70kYzKzPf2hoGPADRzMrMiWSY8ABEMysrJ5JhwgMQzays3EZSQ3s/Z/EdiDGQawCiV1Y0s+HINZIaJuxS/5rCJmDh0y/2uU9Pi2R5lmAzGw5cI6nh3qfWDMl5fnzvM1x97zOv1TSALrWPP7+6odeVFV0rMbNGciKpoT/rkRT12nxcNz4IsXkgZF+31txIb2aN5kRSQ39WSBws6zfmP58b6c2s0ZxIajj09Ttxz3/13X7RSIPZhuPGfDMbCCeSGoZzEgH4j0GKr9KYX2mH8Yh7M8tLMcS3bRqto6MjFi5cmHv/Cb2sYDicjdthLM//6a9dtmcet2+X2saR+7Rx5+OruzTmd5+VGKC1pZnttt6qSy0FcM3FbJSRtCgiOnp8r+yJRNKxwMVAE3BJRFzQ1/6jIZGUxbgdxvLu/f6WaxYsZ2METRIfOmQPbl7cycuvbu6htuPWTew//nVdaoeH770zE9u23+LYjr12rmuS6+n2H5QnsZY9/kYYCbd8B+MaRmwikdQEPAG8G1gB3A98KCIe7e0YJ5KRbYygetb/luamQVsorPvtP4DmMQJ17SAxmOccTGWPvxF6+s7K9v0M1jX0lUjKPiDxYGBZRDwVEX8FrgVOaHBM1kDdl44ZzAkxZ89busVYnvWbYotedsN1Es6yx98IPX1nZft+huIayp5I2oHlVdsrUlkXks6QtFDSwtWrVw9ZcDY8DNZYm/58znAc31P2+Buht++hTN/PUFxD2RNJLhExJyI6IqKjra2tX8fuuHVTnaKyoTJYY2368znDcXxP2eNvhN6+hzJ9P0NxDWVPJJ3AHlXb41PZoHnonGOdTEpkjLputzQ35ZoQM48ZUyfT0tz1t9A8RjQ3dT3pYJ5zMJU9/kbo6Tsr2/czFNdQ9nEk9wOTJE0kSyCnAB8e7JM8dM6xW5S5Eb64svXaqnxOWXs9lT3+RujtOyvT9zMU11DqXlsAko4DLiLr/ntZRHy1r/3722vLzMz67rVV9hoJEXEbcFuj4zAzG63K3kZiZmYN5kRiZmaFOJGYmVkhTiRmZlZI6Xtt9Zek1cDTOXffFfhjHcOpN8ffWGWPH8p/DY5/8OwVET2O6B51iaQ/JC3srbtbGTj+xip7/FD+a3D8Q8O3tszMrBAnEjMzK8SJpG9zGh1AQY6/scoeP5T/Ghz/EHAbiZmZFeIaiZmZFeJEYmZmhTiR9EDSsZKWSlom6axGx5OHpMskrZL0SFXZzpLmS3oyPe/UyBj7ImkPSXdKelTSEkmfSeWluAZJ20i6T9KDKf5zUvlESQvSb+k6SWMbHWtfJDVJWizplrRdmvgl/UHSw5IekLQwlZXi91MhqVXSjZIel/SYpMPKcA1OJN1IagL+FXgPsC/wIUn7NjaqXK4Aui+cchZwe0RMAm5P28PVBuALEbEvcChwZvrey3INrwJHRcQBwIHAsZIOBb4OXBgRbwDWAKc3MMY8PgM8VrVdtviPjIgDq8ZelOX3U3Ex8MuI2Ac4gOy/xfC/hojwo+oBHAbMq9qeCcxsdFw5Y58APFK1vRTYLb3eDVja6Bj7cS0/A95dxmsAtgV+BxxCNip5q1Te5bc13B5kK4zeDhwF3AKoZPH/Adi1W1lpfj/A64DfkzpBlekaXCPZUjuwvGp7RSoro3ERsTK9fg4Y18hg8pI0AZgCLKBE15BuCz0ArALmA/8FrI2IDWmX4f5bugj4IrApbe9CueIP4FeSFkk6I5WV5vcDTARWA5en24uXSNqOElyDE8koEdmfM8O+r7ek7YGfAJ+NiJer3xvu1xARGyPiQLK/7A8G9mlwSLlJeh+wKiIWNTqWAt4WEQeR3ZY+U9I7qt8c7r8fsoUGDwK+HxFTgD/T7TbWcL0GJ5ItdQJ7VG2PT2Vl9Lyk3QDS86oGx9MnSc1kSeTqiJibikt1DQARsRa4k+xWUKukykqkw/m3dDjwfkl/AK4lu711MeWJn4joTM+rgJ+SJfMy/X5WACsiYkHavpEssQz7a3Ai2dL9wKTUW2UscApwc4NjGqibgenp9XSydodhSZKAS4HHIuJbVW+V4hoktUlqTa9byNp3HiNLKCel3YZt/BExMyLGR8QEst/8HRHxEUoSv6TtJO1QeQ0cAzxCSX4/ABHxHLBc0uRUdDTwKCW4Bo9s74Gk48juFzcBl0XEVxscUk2SrgGOIJt2+nlgFnATcD2wJ9nU+R+IiBcbFWNfJL0NuBt4mM336P8PWTvJsL8GSW8CriT7zYwBro+IcyW9nuwv/J2BxcBHI+LVxkVam6QjgH+KiPeVJf4U50/T5lbAv0fEVyXtQgl+PxWSDgQuAcYCTwGnkX5PDONrcCIxM7NCfGvLzMwKcSIxM7NCnEjMzKwQJxIzMyvEicTMzApxIrHSkfTpNDPq1f045mxJnWlm2AckXVDPGIcLSW9PsxE/kMa3VMpbJX0yx/FHVGYCNuvNVrV3MRt2Pgm8KyJW9PO4CyPiG729KakpIjYWC23Y+QhwfkT8uFt5K9n3+L2hD8lGGtdIrFQk/QB4PfALSZ8bhM/7g6SvS/odcLKkvSX9Mk38d7ekfdJ+EyX9Z1rv4iuSXknlXf5il/RdSaem12+WdFf6rHlV01z8Jp3zPklPSHp7Km+S9A1Jj0h6SNKnJB0l6aaqz3+3pJ/SjaSj00R/Dytbm2ZrSZ8APgCc10Pt7QJg71RTma3M7HTuhyV9sIdzvCWdY+80kvyydA2LJZ2Q9jlV0tz0HT4p6V+K/Pexkmj09MN++NHfBz1MF57jmLPJ5ol6ID2mVn3WF6v2ux2YlF4fQjZVCGTTVHwsvT4TeCW9PgK4per47wKnAs3AfwBtqfyDZLMkAPwG+GZ6fRzw6/T6f5HNr1SZtn1nsqncH6/6nH8Hju92bduQzVj9P9L2VWSTXkK2Ts1JPXwfE+i65MDfk81Y3EQ2u+wzZFOWH0E2pfxbgUXAnmn/r5GNcoesdvMEsF269qfIpkTfhmwk9h6N/s34Ud+Hb23ZaNLbra3r4LWZh98K3JBN/QXA1un5cLJ/bAF+RLbgU18mA28E5lkIpHAAAAInSURBVKfPagJWVr1fmZRyEdk/6gDvAn4Qadr2SNNgSPoR8FFJl5NNBPmxHs71+4h4Im1fSZbsLqoRY7W3AddEdmvveUl3AW8BXgb+DpgDHBMRz6b9jyGb5PGf0vY2ZFN4QLYI00sp9keBvei6NIONME4kNmKkf2inAM9GxHH9OPTP6XkM2fobB/ayX0/zCW2g6y3ibSrhAEsi4rBePqsyX9VGav9/eDnwc+C/gRti8/ogQ2Ul2XVNASqJRMDfR8TS6h0lHcLma4N812cl5zYSGzEi4rTIllntTxKpPv5l4PeSToZsRmJJB6S37yGbFReyBuyKp4F9U5tEK9mMrZCtatcm6bD0Wc2S9qsRwnzgfypN2y5p5xTXs2T/gH+ZLKl0txSYIOkNafsfgLtqnOtPwA5V23cDH0ztNG3AO4D70ntrgfcC56cJHQHmAZ9Sqm5JmlLjfDaCOZGYdfUR4HRJDwJLgBNS+WfIFkt6mKpVAiNiOdnMrI+k58Wp/K9k069/PX3WA2S3zfpyCVnbxEPpmA9XvXc1sDwiHut+UET8N9kssTek+DYBP+jrRBHxAnBPalyfTTZz7kPAg8AdZO1Gz1Xt/zzwPuBfU63jPLJ2oIckLUnbNkp59l+zAZD0SkRsP4Tn+y6wOCIuHapzmuXlRGI2AEOZSCQtImvHeXcMw7VAzJxIzMysELeRmJlZIU4kZmZWiBOJmZkV4kRiZmaFOJGYmVkh/x9AZvdC6btnfwAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"length = [len(item[0]) for item in sorted_token_count]\n",
"frequency = [item[1] for item in sorted_token_count]\n",
"plt.scatter(length, frequency)\n",
"\n",
"plt.xlabel('f - Frequency of token')\n",
"plt.ylabel('|r - rank of token')\n",
"plt.title('Zipf\\'s Law')"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"from random import choice\n",
"from nltk.corpus import wordnet"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"def num_meanings(word):\n",
" synonyms = []\n",
" antonyms = []\n",
"\n",
" for syn in wordnet.synsets(word):\n",
" for l in syn.lemmas():\n",
" synonyms.append(l.name())\n",
" if l.antonyms():\n",
" antonyms.append(l.antonyms()[0].name())\n",
"\n",
" return len(list(set(synonyms)))"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('twitter', 1760),\n",
" ('hi', 1801),\n",
" ('he', 557),\n",
" ('at', 566),\n",
" ('sir', 691),\n",
" ('me', 1955),\n",
" ('log', 448),\n",
" ('of', 1074),\n",
" ('india', 388),\n",
" ('time', 385)]"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"choose = sorted_token_count[:100]\n",
"l1 = ['twitter', 'hi', 'he', 'at', 'sir', 'me','log', 'of', 'india','time']\n",
"lst = []\n",
"for k in l1:\n",
" item = (k, token_count[k])\n",
" lst.append(item)\n",
"# lst.sort(key=lambda x:x[1], reverse=True)\n",
"lst"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"num_meanings('hello')"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[3, 9, 4, 4, 2, 3, 3, 0, 3, 9]"
]
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEWCAYAAACXGLsWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3df5xWZZ3/8ddbQB0rHZWJlQECjTDMCp3Ioi3TVtR1ha/rluVuaBa7ZT/MloK2x/qjdqUwS7+7UpSkluuPjEUsi0j8tW7+QDEQFeWLFQwaGD80nRTw8/3jXCM348zcZ4a573Mz9/v5eNyPOec61znnMzfc92fOdZ1zXYoIzMzMurNH0QGYmVntc7IwM7OynCzMzKwsJwszMyvLycLMzMpysjAzs7KcLMy6IOnnkqbkrDtE0p2SnpP0zUrHZlZtA4sOwKwIkk4HvtvJptcA50XEhRFxQg8OORV4Btg3IkLS+QARcX4P4wpgdESs6sl+ZpXmKwurSxFxTUS8tvQFnAP8AfheLw75BuCR8FOu1k85WZgBksYB3wZOi4inUtntkj6els+QdLek/5C0RdJjko5N264EpgBflPQnSR/ocOzBkn4qabOkjZLuktSjz56kQyQtlvRHSc9IukZSY9p2pqSbS+o+IenHJetrJL29V2+MWeJmKKt76Uv3RuCrEXF7N1XfmeoNBk4B5kkaFRFnSAJYGxFfSXV/VbLfF4C1QFNaPwro6RWIgIuAO4F9gZ8A55NdDd0BfCsloL8A9gTelX63g4HXAst6eD6znfjKwuqasm/5q4GHgW+Uqb4e+HZEbI2I64GVwF/nOM1W4CDgDWnfu3raXBURqyJiUUS8GBEbgEuA96Vtq4HngLcD7wUWAuskHZrq3BURL/fkfGYd+crC6t2XgMOAI3N8gbd2qPM7YGiOc8wiuwr4ZboCmRMRM3sSpKQhwKXAXwKvI/tDb1NJlTuAo4E3puXNZIniXWndbJf4ysLqlqSjgX8BTo2IzTl2aU5XIu1GAOvK7RQRz0XEFyLiYOBk4Nz2/o4e+HeypqvDI2Jf4O/JmqbatSeLv0zLd5Ali/fhZGF9wMnC6pKkg4DrgHMiYmnO3V4PfFbSIEl/B7wZuCXHuU6S9MaUaLYA24HumoX2lLR3yWsA2dXEn4AtkpqBaR32uQN4P9AQEWuBu4DjgQOBvL+fWZecLKxefQIYAlya7mAqfX2ni33uBUaTPU/xb2RXJH/Mca7RZB3efwJ+DVweEbd1U38F0FbyOhO4ADiCLNn8DJhXukNEPJ6Of1dafxZYDdwdEdtzxGjWLfm2cLPyJJ0BfDwi3lN0LGZF8JWFmZmV5WRhZmZluRnKzMzK8pWFmZmV1S8fyhs8eHCMHDmy6DDMzHYrDzzwwDMR0dTZtoolC0nDyYZRGEL2MNGciLhU0gHA9cBI4LfAByNiU7oH/VLgROAF4IyIeDAdawrQPubO1yLiqu7OPXLkSJYsWdL3v5SZWT8m6XddbatkM9Q24AsRMZZs4LSzJY0FpgO3RsRo4Na0DnAC2f3oo8nmBpgNkJLLeWSDuI0HzpO0fwXjNjOzDiqWLCLiqfYrg4h4DngUaAYmAe1XBlcBk9PyJODqyNwDNKanbCcCiyJiY0RsAhaRPZlqZmZVUpUObkkjgXFkT8AOaZ8vAHiarJkKskSypmS3tamsq3IzM6uSiicLSa8lG3v/nDQEwSvSCJ59cu+upKmSlkhasmHDhr44pJmZJRVNFpIGkSWKayKifSybP6TmpfbB3Nan8lZgeMnuw1JZV+U7iYg5EdESES1NTZ125puZWS9VLFmku5uuAB6NiEtKNi0gm4KS9POmkvKPKnMUsCU1Vy0EjpO0f+rYPi6VWR2Yv7SVCTMXM2r6z5gwczHzl77q7wQzq4JKPmcxAfgHYLmkh1LZl4GZwA2SziKbPOaDadstZLfNriK7dfZMgIjYKOmrwP2p3oURsbGCcVuNmL+0lRnzltO2NRs0tXVzGzPmLQdg8jh3W5lVU78c7qOlpSX8nMXub8LMxbRubntVeXNjA3dPP6aAiMz6N0kPRERLZ9s83IfVrHWdJIruys2scpwsrGYNbWzoUbmZVY6ThdWsaRPH0DBowE5lDYMGMG3imIIiMqtf/XIgQesf2juxZy1cybrNbQxtbGDaxDHu3DYrgJOF1bTJ45qdHMxqgJuhzMysLCcLMzMry8nCzMzKcrIwM7OynCzMzKwsJwszMyvLycLMzMpysjAzs7KcLMzMrCwnCzMzK8vJwszMynKyMDOzsio5B/dcSeslPVxS9nZJ90h6SNISSeNTuSRdJmmVpGWSjijZZ4qkJ9JrSmfnMjOrd5Wer76SVxZXAsd3KPsGcEFEvB3417QOcAIwOr2mArMBJB0AnAe8ExgPnCdp/wrGbGa222mfr751cxvBjvnq+zJhVCxZRMSdwMaOxcC+aXk/YF1angRcHZl7gEZJBwETgUURsTEiNgGLeHUCMjOra7MWrqRt6/adytq2bmfWwpV9do5qz2dxDrBQ0sVkierdqbwZWFNSb20q66r8VSRNJbsqYcSIEX0btZlZDavGfPXV7uD+JPD5iBgOfB64oq8OHBFzIqIlIlqampr66rBmZjWvGvPVVztZTAHmpeUfk/VDALQCw0vqDUtlXZVbnah0p51Zf1CN+eqrnSzWAe9Ly8cAT6TlBcBH011RRwFbIuIpYCFwnKT9U8f2canM6kA1Ou3M+oPJ45q56JTDaW5sQEBzYwMXnXJ4n05JXLE+C0nXAkcDgyWtJbur6RPApZIGAn8m9TEAtwAnAquAF4AzASJio6SvAvenehdGRMdOc+unuuu087zcZjur9Hz1FUsWEfHhLjYd2UndAM7u4jhzgbl9GJrtJqrRaWdm+fgJbqtZ1ei0M7N8nCysZlWj087M8qn2cxZmubW3v85auJJ1m9sY2tjAtIlj3F9hVgAnC6tple60M7N83AxlZmZlOVmYmVlZThZmZlaWk4WZmZXlZGFmZmU5WZiZWVlOFmZmVlbZ5ywkvQmYBryhtH5EHFPBuMzMrIbkeSjvx8B3gO8B28vUNTOzfihPstgWEbMrHomZmdWsPH0WN0v6lKSDJB3Q/qp4ZGZmVjPyXFlMST+nlZQFcHDfh2NmZrWo7JVFRIzq5FU2UUiaK2m9pIc7lH9G0mOSVkj6Rkn5DEmrJK2UNLGk/PhUtkrS9J7+gmZmtuvKJgtJ+0j6iqQ5aX20pJNyHPtK4PgOx3o/MAl4W0QcBlycyscCpwGHpX0ulzRA0gDgP4ETgLHAh1NdMzOrojx9Fj8AXgLendZbga+V2yki7gQ6zpf9SWBmRLyY6qxP5ZOA6yLixYh4kmwu7vHptSoiVkfES8B1qa6ZmVVRnmRxSER8A9gKEBEvAOrl+d4E/KWkeyXdIekdqbwZWFNSb20q66r8VSRNlbRE0pINGzb0MjwzM+tMnmTxkqQGsk5tJB0CvNjL8w0EDgCOIuswv0FSbxPPTiJiTkS0RERLU1NTXxzSzMySPHdDnQf8Ahgu6RpgAnBGL8+3FpgXEQHcJ+llYDBZ09bwknrDUhndlJuZWZXkuRtqEXAKWYK4FmiJiNt7eb75wPvhlWFE9gSeARYAp0naS9IoYDRwH3A/MFrSKEl7knWCL+jluc3MrJfy3A11YUT8MSJ+FhE/BTamK4xy+10L/BoYI2mtpLOAucDB6Xba64ApkVkB3AA8QnYVc3ZEbI+IbcCngYXAo8ANqa6ZmVWRshahbipIPwAej4iLJO1F9qW+NCLOr0J8vdLS0hJLliwpOgwzs92KpAcioqWzbXk6uD8GHC5pBnAzcHstJwozM+t7XXZwSzqiZPVS4LvA3cAdko6IiAcrHZyZmdWG7u6G+maH9U1kT1F/k+w2Ws9nYWZWJ7pMFhHx/moGYmZmtSvP3VD7Sbqk/eloSd+UtF81gjMzs9qQp4N7LvAc8MH0epZsvCgzM6sTeZ7gPiQi/rZk/QJJD1UqIDMzqz15rizaJL2nfUXSBKCtciGZmVmtyXNl8U/A1SX9FJvYMXuemZnVgTzJ4tmIeJukfQEi4tk0fpOZmdWJPM1QP4EsSUTEs6nsxsqFZGZmtaa7J7gPJZvmdD9Jp5Rs2hfYu9KBmZlZ7eiuGWoMcBLQCPxNSflzwCcqGZSZmdWW7p7gvgm4SdK7IuLXVYzJzMxqTJ7Jj5wozMzqXJ4ObjMzq3MVSxaS5kpan2bF67jtC5JC0uC0LkmXSVolaVnp8OiSpkh6Ir38fIeZWQHyDCQ4RNIVkn6e1semKVLLuRI4vpPjDQeOA35fUnwC2bzbo4GpwOxU9wDgPOCdwHjgPEn75zi3mZn1oTxXFleSzYE9NK0/DpxTbqeIuBPY2MmmbwFfJJsTo90k4Oo0H/c9QKOkg4CJwKKI2BgRm4BFdJKAzMyssvIki8ERcQPwMkBEbAO29+ZkkiYBrRHxmw6bmoE1JetrU1lX5Z0de2r7MOobNmzoTXhmZtaFPMnieUkHkq4EJB0FbOnpiSTtA3wZ+Nee7ptHRMyJiJaIaGlqaqrEKczM6laesaHOBRYAh0i6G2gCTu3FuQ4BRgG/kQQwDHhQ0nigFRheUndYKmsFju5Qfnsvzm1mZrugbLKIiAclvY/siW4BKyNia09PFBHLgde3r0v6LdASEc9IWgB8WtJ1ZJ3ZWyLiKUkLgX8v6dQ+DpjR03ObmdmuyXNlAdmdSCNT/SMkERFXd7eDpGvJrgoGS1oLnBcRV3RR/RbgRGAV8AJwJkBEbJT0VeD+VO/CiOis09zMzCqobLKQ9EOyJqSH2NGxHUC3ySIiPlxm+8iS5QDO7qLeXLKpXc3MrCB5rixagLHpC93MzOpQnruhHgb+otKBmJlZ7epuPoubyZqbXgc8Iuk+4MX27RFxcuXDMzOzWtBdM9TFVYvCzMxqWnfzWdwBIOnrEfGl0m2Svg7cUeHYzMysRuTps/irTspO6OtAzMysdnXXZ/FJ4FPAwZKWlWx6HXB3pQMzM7Pa0V2fxX8BPwcuAqaXlD/nB+PMzOpLd30WW8gGDOz24TozM+v/PK2qmZmV1WWykLRXNQMxM7Pa1d2Vxa/hlbGhzMysjnXXwb2npI8A75Z0SseNETGvcmGZmVkt6S5Z/BNwOtAI/E2HbQE4WZiZ1Ynu7ob6H+B/JC3pZh4KMzOrA3mGKP+hpM8C703rdwDf6c1seWZmtnvKc+vs5cCR6eflwBHA7HI7SZorab2kh0vKZkl6TNIySf8tqbFk2wxJqyStlDSxpPz4VLZK0vSO5zEzs8rLkyzeERFTImJxep0JvCPHflcCx3coWwS8JSLeCjxOmk9b0ljgNOCwtM/lkgZIGgD8J9lYVGOBD6e6ZmZWRXmSxXZJh7SvSDqYHdOrdiki7gQ2dij7ZURsS6v3AMPS8iTguoh4MSKeJJuLe3x6rYqI1RHxEnBdqmtmZlWUp89iGnCbpNWAgDcAZ/bBuT8GXJ+Wm8mSR7u1qQxgTYfyd3Z2MElTgakAI0aM6IPwzMysXdlkERG3ShoNjElFKyPixe72KUfSvwDbgGt25TilImIOMAegpaXF84WbmfWhPFcWpOSwrGzFHCSdAZwEHBsR7V/qrcDwkmrDUhndlJuZWZVUdSBBSccDXwROjogXSjYtAE6TtJekUcBo4D7gfmC0pFGS9iTrBF9QzZjNzCznlUVvSLoWOBoYLGktcB7Z3U97AYskAdwTEf8UESsk3QA8QtY8dXZEbE/H+TSwEBgAzI2IFZWK2czMOqcdLUFdVJDmAVcAP4+Il6sS1S5qaWmJJUuWFB2GmdluRdIDEdHS2ba8D+V9BHhC0kxJY8rtYGZm/UvZZBERv4qI08me3P4t8CtJ/yvpTEmDKh2gmZkVL1cHt6QDgTOAjwNLgUvJkseiikVmZmY1o2wHt6T/JnvG4ofA30TEU2nT9ZLcMWBmVgfy3A11WUTc1tmGrjpCzMysf8nTDDW2w+iw+0v6VAVjMjOzGpMnWXwiIja3r0TEJuATlQvJzMxqTZ5mqAGS1D40Rxo2fM/KhlWM+UtbmbVwJes2tzG0sYFpE8cweVxz+R3NzPq5PMniF2Sd2d9N6/+YyvqV+UtbmTFvOW1bs9HXWze3MWPecgAnDDOre3maob4E3AZ8Mr1uJRvfqV+ZtXDlK4miXdvW7cxauLKgiMzMakeeIcpfJptGtexUqruzdZvbelRuZlZPyl5ZSJogaZGkxyWtlvRkmgipXxna2NCjcjOzepKnGeoK4BLgPWRzb7eQbw7u3cq0iWNoGDRgp7KGQQOYNtFDYZmZ5eng3hIRP694JAVr78T23VBmZq+WJ1ncJmkWMA94ZTrViHiwYlEVZPK4ZicHM7NO5EkW70w/S4f2COCYvg/HzMxqUZ4hyt/fyatsopA0V9J6SQ+XlB2QOsufSD/3T+WSdJmkVZKWSTqiZJ8pqf4Tkqb09hc1M7Pey3M31BBJV0j6eVofK+msHMe+Eji+Q9l04NaIGE32vMb0VH4C2bzbo4GppNt0JR1ANh3rO4HxwHntCcbMzKonz91QV5LNgT00rT8OnFNup4i4E9jYoXgScFVavgqYXFJ+dWTuARolHQRMBBZFxMY0JtUiXp2AzMyswvIki8ERcQPwMkBEbAO2d79Ll4aUzIfxNDAkLTcDa0rqrU1lXZW/iqSpkpZIWrJhw4ZehmdmZp3JkyyeTzPltQ8keBSwZVdPnAYmjF09Tsnx5kRES0S0NDU19dVhzcyMfMniXGABcIiku4Grgc/08nx/SM1LpJ/rU3krMLyk3rBU1lW5mZlVUZ67oR4E3ge8m2zE2cMiYlkvz7cAaL+jaQpwU0n5R9NdUUeRPQj4FFlfyXFpwqX9geNSmZmZVVGeObg/2qHoCElExNVl9rsWOBoYLGkt2V1NM4Eb0t1UvwM+mKrfApwIrAJeAM4EiIiNkr4K3J/qXRgRHTvNzcyswvI8lFc6DtTewLHAg2TNUV2KiA93senYTuoGcHYXx5kLzM0Rp5mZVUieIcp36p9I83FfV7GIzMys5uTp4O7oeWBUXwdiZma1K0+fxc3suMV1D2AscEMlgzIzs9qSp8/i4pLlbcDvImJtheIxM7MalKfP4o5qBGJmZrUrTzPUc3T+pLXIbmTat8+jMjOzmpKnGerbwFPAD8kSxOnAQRHxr5UMzMzMakeeu6FOjojLI+K5iHg2ImaTjRJrZmZ1Iu9AgqdLGiBpD0mnk90+a2ZmdSJPsvgI2bAcf0ivv0tlZmZWJ/LcDfVb3OxkZlbX8kyr+iZJt7bPpS3prZK+UvnQzMysVuRphvoeMAPYCpCGJz+tkkGZmVltyZMs9omI+zqUbatEMGZmVpvyJItnJB3CjmlVTyV77sLMzOpEnofyzgbmAIdKagWeJHswz8zM6kS3yULSHkBLRHxA0muAPSLiueqEZmZmtaLbZqiIeBn4Ylp+vq8ShaTPS1oh6WFJ10raW9IoSfdKWiXpekl7prp7pfVVafvIvojBzMzyy9Nn8StJ/yxpuKQD2l+9PaGkZuCzZFcsbwEGkN1d9XXgWxHxRmATcFba5SxgUyr/VqpnZmZVlCdZfIis3+JO4IH0WrKL5x0INEgaCOxD1mF+DHBj2n4VMDktT0rrpO3HStIunt/MzHogzxPcfTqFakS0SroY+D3QBvySLAFtjoj2W3LXAs1puRlYk/bdJmkLcCDwTOlxJU0FpgKMGDGiL0M2M6t7XV5ZSPr3kuW/6qsTStqf7GphFDAUeA1w/K4eNyLmRERLRLQ0NTXt6uHMzKxEd81QpV/gfdlP8AHgyYjYEBFbgXnABKAxNUsBDANa03IrMBwgbd8P+GMfxmNmZmXk6bPoa78HjpK0T+p7OBZ4BLgNODXVmQLclJYXpHXS9sUR0dnMfWZmViHd9Vm8XtK5ZLPjtS+/IiIu6c0JI+JeSTcCD5ING7KU7KG/nwHXSfpaKrsi7XIF8ENJq4CNeFwqM7Oq6y5ZfA94XSfLuywizgPO61C8GhjfSd0/k82hYWZmBekyWUTEBdUMxMzMalcRfRZmZrabcbIwM7OynCzMzKysPEOUv0LSTyPipEoFY7Vj/tJWZi1cybrNbQxtbGDaxDFMHtdcfkcz65d6lCzYMQSH9WPzl7YyY95y2rZuB6B1cxsz5i0HcMIwq1M9bYZaWpEorKbMWrjylUTRrm3rdmYtXFlQRGZWtB4li4j4WKUCsdqxbnNbj8rNrP9zB7e9ytDGhh6Vm1n/52RhrzJt4hgaBg3Yqaxh0ACmTRxTUERmVrTuhij/Yfr5ueqFY7Vg8rhmLjrlcJobGxDQ3NjARacc7s5tszrW3d1QR0oaCnxM0tVkAwq+IiI2VjQyK9Tkcc1ODmb2iu6SxXeAW4GDyWayK00WkcrNzKwOdNkMFRGXRcSbgbkRcXBEjCp5OVGYmdWRsh3cEfHJagRiZma1y3dDmZlZWYUkC0mNkm6U9JikRyW9S9IBkhZJeiL93D/VlaTLJK2StEzSEUXEbGZWz4q6srgU+EVEHAq8DXgUmA7cGhGjyTrWp6e6JwCj02sqMLv64ZqZ1beqJwtJ+wHvJc2xHREvRcRmYBJwVap2FTA5LU8Cro7MPUCjpIOqHLaZWV0r4spiFLAB+IGkpZK+L+k1wJCIeCrVeRoYkpabgTUl+6+lk9FvJU2VtETSkg0bNlQwfDOz+lNEshgIHAHMjohxwPPsaHICICKC7FmO3CJiTkS0RERLU1NTnwVrBtmw7RNmLmbU9J8xYeZi5i9tLToks6oqIlmsBdZGxL1p/Uay5PGH9ual9HN92t4KDC/Zf1gqM6uK+UtbmXbjb2jd3EaQze8x7cbfOGFYXal6soiIp4E1ktpHpTsWeARYAExJZVOAm9LyAuCj6a6oo4AtJc1VZhV3wc0r2Lp95wvdrduDC25eUVBEZtXX05ny+spngGsk7QmsBs4kS1w3SDoL+B3wwVT3FuBEYBXwQqprVjWbXtjao3Kz/qiQZBERDwEtnWw6tpO6AZxd8aDMzKxLfoLbrIzGhkE9Kjfrj5wszMo4/+TDGLTHTiP0M2gPcf7JhxUUkVn1FdVnYbbbaJ/XY9bClazb3MbQxgamTRzj+T6srjhZmOXgyaCs3rkZyszMynKyMDOzspwszMysLCcLMzMry8nCzMzKcrIwM7OyfOus1bT5S1v9fINZDXCysJo1f2krM+Ytp23rdiAbGnzGvOUAThhmVeZmKKtZsxaufCVRtGvbup1ZC1cWFJFZ/XKysJq1bnNbj8rNrHKcLKxmDW1s6FG5mVWOk4XVrGkTx9AwaMBOZQ2DBjBt4pgu9jCzSiksWUgaIGmppJ+m9VGS7pW0StL1aRY9JO2V1lel7SOLitmqa/K4Zi465XCaGxsQ0NzYwEWnHO7ObbMCFHk31OeAR4F90/rXgW9FxHWSvgOcBcxOPzdFxBslnZbqfaiIgK36PNqrWW0o5MpC0jDgr4Hvp3UBxwA3pipXAZPT8qS0Ttp+bKpvdWD+0lYmzFzMqOk/Y8LMxcxf2lp0SGY1qdKflaKuLL4NfBF4XVo/ENgcEdvS+lqg/c/JZmANQERsk7Ql1X+m9ICSpgJTAUaMGFHR4K06/JyFWT7V+KxU/cpC0knA+oh4oC+PGxFzIqIlIlqampr68tBWED9nYZZPNT4rRVxZTABOlnQisDdZn8WlQKOkgenqYhjQfg3VCgwH1koaCOwH/LH6YVu1+TkLs3yq8Vmp+pVFRMyIiGERMRI4DVgcEacDtwGnpmpTgJvS8oK0Ttq+OCKiiiFbQfychVk+1fis1NJzFl8CzpW0iqxP4opUfgVwYCo/F5heUHxWZX7OwiyfanxWCh1IMCJuB25Py6uB8Z3U+TPwd1UNzGpCe8ecR5016141Pivqjy06LS0tsWTJkqLDMDPbrUh6ICJaOttWS81QZmZWo5wszMysLCcLMzMry8nCzMzKcrIwM7Oy+uXdUJI2AL/bhUMMpsPYU3XM78XO/H7s4PdiZ/3h/XhDRHQ6XlK/TBa7StKSrm4fqzd+L3bm92MHvxc76+/vh5uhzMysLCcLMzMry8mic3OKDqCG+L3Ymd+PHfxe7Kxfvx/uszAzs7J8ZWFmZmU5WZiZWVlOFiUkHS9ppaRVkup63gxJwyXdJukRSSskfa7omIomaYCkpZJ+WnQsRZPUKOlGSY9JelTSu4qOqSiSPp8+Iw9LulbS3kXHVAlOFomkAcB/AicAY4EPSxpbbFSF2gZ8ISLGAkcBZ9f5+wHwOeDRooOoEZcCv4iIQ4G3Uafvi6Rm4LNAS0S8BRhANgNov+NkscN4YFVErI6Il4DrgEkFx1SYiHgqIh5My8+RfRnU7axDkoYBfw18v+hYiiZpP+C9pNksI+KliNhcbFSFGgg0SBoI7AOsKzieinCy2KEZWFOyvpY6/nIsJWkkMA64t9hICvVt4IvAy0UHUgNGARuAH6Rmue9Lek3RQRUhIlqBi4HfA08BWyLil8VGVRlOFtYtSa8FfgKcExHPFh1PESSdBKyPiAeKjqVGDASOAGZHxDjgeaAu+/gk7U/WAjEKGAq8RtLfFxtVZThZ7NAKDC9ZH5bK6pakQWSJ4pqImFd0PAWaAJws6bdkzZPHSPpRsSEVai2wNiLarzRvJEse9egDwJMRsSEitgLzgHcXHFNFOFnscD8wWtIoSXuSdVItKDimwkgSWZv0oxFxSdHxFCkiZkTEsIgYSfb/YnFE9Mu/HvOIiKeBNZLGpKJjgUcKDKlIvweOkrRP+swcSz/t7B9YdAC1IiK2Sfo0sJDsjoa5EbGi4LCKNAH4B2C5pIdS2Zcj4pYCY7La8RngmvSH1WrgzILjKURE3CvpRuBBsjsIl9JPh/3wcB9mZlaWm6HMzKwsJwszMyvLycLMzMpysjAzs7KcLMzMrCwnC+s3JJ0v6QVJry8p+1MfHXukpIf74lhlzrOXpF9JekjShyp9vjKxnFzvoy/bDn7OwvqbZ4AvAF8qOpBSkgZGxLYcVccBRMTbKxxSWRGxgDp+MNV25isLK0z6a/0xSVdKelzSNZI+IOluSU9IGt+Lw84FPiTpgAlqnaYAAAOrSURBVE7O9XDJ+j9LOj8t3y7pW5KWpLkZ3iFpXorhayWHGZhifDTN5bBP2v9ISXdIekDSQkkHlRz325KWkA1vXhrPAZLmS1om6R5Jb01XRD8C3pGuLA7psE+uOCX9vaT70jG+m4bfR9LstO8KSReU1P+tpAskPShpuaRDU/kZkv4jLV8p6TJJ/ytptaRTU/keki5P/46LJN1Ssm2msvlQlkm6uBf/llZDnCysaG8Evgkcml4fAd4D/DPw5V4c709kCaOnkzW9FBEtwHeAm4CzgbcAZ0g6MNUZA1weEW8GngU+lcbP+r/AqRFxZDr3v5Ucd8+IaImIb3Y43wXA0oh4K9nveXVErAc+DtwVEW+PiP/X0zglvRn4EDAhXZ1sB05P+/5L2vetwPskvbXkuM9ExBHAbLL3vjMHkf3bnATMTGWnACPJ5oD5B+BdAOk9+z/AYel3/FrHg9nuxc1QVrQnI2I5gKQVwK0REZKWk30J9cZlwEM9/Gu2vbllObAiIp5KMa0mG2ByM7AmIu5O9X5ENunNL8i+rBdlQwMxgGyo6nbXd3G+9wB/CxARi9MX/b59EOd7gCOB+1M8DcD6tM8HJU0l+9wfRPYFvyxtax8o8gGyBNCZ+RHxMvCIpCElv8ePU/nTkm5L5VuAPwNXKJtZsO5nF9zdOVlY0V4sWX65ZP1lOvn/KekHZO366yLixM4OGBGbJf0X2V/d7bax85V0x6kvS8/bMab2ODqOjROAyL60u5pW9PkuynurXJwCroqIGaU7SRpFdsXwjojYJOlKdn4P2o+1na6/F0rPp+6CTGOtjScbWO9U4NPAMd3tY7XNzVC2W4mIM1MTTaeJosQlwD+y44vvD8Dr01/we5E1pfTUCO2Ya/ojwP8AK4Gm9nJJgyQdluNYd5GahyQdTdYM1BfzhdwKnNp+R1jqG3kDsC9Z4tqSrgpO6INzAdwN/G3quxgCHJ3O+1pgvzTw5OfJpl613ZivLKxfiohnJP032RcVEbFV0oXAfWTzlDzWi8OuJJuLfC7ZkNyzI+Kl1KF7mbLpRgeSzapXbsTi84G5kpYBLwBTehHPq0TEI5K+AvxS0h7AVuDsiLhH0lKy33sN2Zd8X/gJO4YoX0M2+uoW4HXATZL2JrsKObePzmcF8aizZrZLJL02Iv6UOrXvI+tcf7rouKxv+crCzHbVTyU1AnsCX3Wi6J98ZWFmZmW5g9vMzMpysjAzs7KcLMzMrCwnCzMzK8vJwszMyvr/waupGHGKTMkAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"meanings = {'to':4,'the':4 ,'raha':2, 'diya':3, 'my':2, 'for':3, 'com':3, 'do':2, 'india':1,'time':1}\n",
"# m = [meanings[lst[i][0]] for i in range(10)]\n",
"m = [num_meanings(lst[i][0]) for i in range(10)]\n",
"f = [lst[i][1] for i in range(10)] \n",
"plt.scatter(m, f)\n",
"plt.xlabel('m - Number of meanings')\n",
"plt.ylabel('f - Frequency of the token')\n",
"plt.title('Zipf\\'s Law')\n",
"m\n",
"# lst"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 3. Heaps' Law"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"vsize = 0\n",
"num_tokens = 0\n",
"unique_tokens = []\n",
"V = []\n",
"N = []\n",
"\n",
"for i in range(len(tokens)):\n",
" s = tokens[i]\n",
" if s not in unique_tokens:\n",
" unique_tokens.append(s)\n",
" vsize += 1\n",
" V.append(vsize)\n",
" N.append(i+1)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"N = np.array(N)\n",
"V = np.array(V)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 1.0, \"Heaps' Law\")"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAEWCAYAAABMoxE0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3dd3zV1f3H8deHjWxkGPZUQBliBNyoLSJqsdYqOOvCOqq2tRVbq3b82tq6994D92hFERVQVEYYyoawiUDYhE2Sz++P74leaSCXJDc3N3k/H4/7uN97vuvz5YZ88j3nfM8xd0dERKS4qiQ7ABERSW1KJCIiUiJKJCIiUiJKJCIiUiJKJCIiUiJKJCIiUiJKJCIiUiJKJCKAmS0xsx/tUfYLMxufrJhCDHt90KuwmEWSQYlERERKRIlEJE5m1sLM3jSzNWa22Myui1nXx8y+MrONZrbSzB40sxox693MrjOzRWa21sz+bWZVwrpOZjbOzDaFda+WMM5GZvbfEOeGsNwqrDvRzGbEbDvazCbHfP7czM4syfml8lEiEYlD+KX/H+BroCVwMnCDmZ0SNskDfg00AY4K66/e4zA/BdKB3sBg4NJQ/lfgI6AR0Ap4oGAHd7dihFsFeAZoC7QBtgMPhnUTgM5m1sTMqgM9gBZmVs/Maof4Pi/GOaUSq5bsAETKkXfMLDfmcw1galg+Emjq7n8JnxeZ2RPAEGCUu0+J2W+JmT0GnADcG1N+h7uvB9ab2b3AUOBJYDfRL/0W7r4CKFG7jLuvA94s+Gxm/weMCeu2hzuQ44FviRLjRuAYYCewIOwvEjfdkYh870x3b1jw4od3FG2J/nLfWPAC/gA0BzCzg0MV0ioz2wz8nejuJNbymOWlQIuw/HvAgElmNsvMLqUEzOwAM3vMzJaGWD4DGppZ1bDJOKA/UTIZB4wlSnonhM8i+0WJRCQ+y4HFsYnG3eu5+6Cw/hFgLtDZ3esTJZk9q6Vaxyy3IbojwN1XufsV7t4CuBJ42Mw6lSDW3wKHAH1DLMeH8oJ49kwk41AikRJQIhGJzyQgx8xuMrPaZlbVzA4zsyPD+nrAZmCLmXUBrirkGL8LDeGtgeuBVwHM7OcFjeHABsCB/Djjqm5mtWJe1UIs24GNZtYYuG2Pfb4kSjR9gEnuPovojqsv0d2LyH5RIhGJg7vnAacDvYDFwFqi9o0GYZMbgfOAHOAJQpLYw7vAFGA68D7wVCg/EphoZluA94Dr3X1RnKGNJEoaBa/bidplaocYJwAf7nEtW4nafma5+65Q/BWw1N2z4zyvyHdME1uJJF54sLCzu2cmOxaR0qY7EhERKRElEhERKRFVbYmISInojkREREqk0j3Z3qRJE2/Xrl2ywxARSSlTpkxZ6+5NC1tX6RJJu3btyMjISHYYIiIpxcyW7m2dqrZERKRElEhERKRElEhERKRElEhERKRElEhERKRElEhERKRElEhERKRElEhERCq4FRu28cRni/gic21Cjl/pHkgUEakMMrO3MHr2aj6YuZJvVmwC4Kr+HTmm054zQJecEomISAWxKzefUbNW8cKEpUxavB6A7i0bcNPALpzWPY02Bx6QkPMqkYiIpLDcvHy+WLiO97/5lv9+s5Jtu/Jo0/gAbj61Cz/p1YK0BrUTHoMSiYhICpr97WZenLiUj2atYu2WXdSuXpUBhzZncK8W9D+4GVWqWJnFokQiIpIisnN28OKEZbz/zbcsXLOVmtWq8ONuzTm9Rxr9D2lGrepVkxKXEomISDm2Oy+fMXOzeWXSMsZnrmV3nnNc5yac37ctZ/VuScMDaiQ7RCUSEZHyKGvjdt6asoIRk5eTtXE7zerV5NJj2nPuka3p0LRussP7ASUSEZFywt35bMFanv9yCWPnryEv3+nTvjG3ntGNk7s0o1rV8vnonxKJiEiS7czN4z9fr+ThsZksWrOV5vVrctmx7TmvTxvaNamT7PCKpEQiIpIkC9ds4dXJy3ktYzkbt+2mW1p9/v7T7vzsiJbUrJachvPiUCIRESlDO3Pz+GRONo+MXciMrE1UrWIM6Nacc49szXGdm1K1DLvtlhYlEhGRMjBn5WaeGr+YD2euYsvOXFo0qMUtp3XlJz1b0Kx+rWSHVyJKJCIiCbJp+25GzVzFG1NWMGnJeurUqMqp3dMY1P0gju/ctNw2nu+vhCYSM6sFfAbUDOd6w91vM7P2wAjgQGAKcKG77zKzmsDzwBHAOuBcd18SjnUzcBmQB1zn7qNC+UDgPqAq8KS7/zOR1yQiUpTM7Bzu+XgBH85cRV6+0/bAAxh+aheGHNm6XDz3UdoSfUeyEzjJ3beYWXVgvJl9APwGuMfdR5jZo0QJ4pHwvsHdO5nZEOAO4Fwz6wYMAQ4FWgAfm9nB4RwPAT8GVgCTzew9d5+d4OsSEfmB3Lx8Pp6TzbNfLmbCouju49Jj2nFajxb0bNUAs9Rr+4hXQhOJuzuwJXysHl4OnAScF8qfA24nSiSDwzLAG8CDFv3rDwZGuPtOYLGZZQJ9wnaZ7r4IwMxGhG2VSESkTKzbspMRk5fzzBdLWLtlJwfVr8WNAw5maJ82HFi3ZrLDKxMJbyMxs6pE1VediO4eFgIb3T03bLICaBmWWwLLAdw918w2EVV/tQQmxBw2dp/le5T3TcBliIh8Jy/f+XjOal7PWMFn89ewKy+fYzs14YJ+h/GjruX3wcFESXgicfc8oJeZNQTeBrok+px7MrNhwDCANm3alPXpRaSCWLlpO29NzeKlCUv5dtMOmtaryYVHtWXIka3p3LxessNLmjLrteXuG81sDHAU0NDMqoW7klZAVtgsC2gNrDCzakADokb3gvICsfvsrTz23I8DjwOkp6d7qV2UiFR423flMW7+Gt6cuoJP5qwm3/l+2JKuzaleye4+CpPoXltNgd0hidQmahS/AxgDnE3Uc+ti4N2wy3vh81dh/afu7mb2HvCymd1N1NjeGZgEGNA59ALLImqQL2h7EREptuXrt/Hk54t4c2oWW3bmcmCdGlxxfAfOTS9/gyYmW6LvSNKA50I7SRXgNXf/r5nNBkaY2d+AacBTYfungBdCY/p6osSAu88ys9eIGtFzgWtClRlmdi0wiqj779PuPivB1yQiFZS780XmOp4cv4ix89ZgBmf0aME56a3p26Gx7j72wqKOVZVHenq6Z2RkJDsMESlHNm3bzetTlvPypGUsWrOVA+vU4Px+bTknvRWtGiVmnvNUY2ZT3D29sHV6sl1EKq3l67fxxOeLeHtqFjk7c+ndpiH/OrsHg3u1SKlBE5NNiUREKpUdu/P47zcreWPKciYsWk/1qsag7mlcfmwHurdqkOzwUpISiYhUCvNW5fDKpGW8Oz2LDdt207pxbW4ccDA/7d2Klg1rJzu8lKZEIiIV1sZtu3hrahZvTl3BrG83U72qMeDQgzi/TxuO6nhghR62pCwpkYhIhZKzYzdj563hlUnLmLBoHfkO3dLqc+vp3Rjcq0WlGbakLCmRiEiFMH91Dg9+msnIGSvJzXdaNqzN1f07MfCwgzispdo+EkmJRERSVn4Y82rE5OWMmZdNnRrVuKBfWwYedhBHtmuckrMNpiIlEhFJObvz8nn/m5U8OCaTzOwtNK1XkyuP78jlx7WniaquypwSiYikjGnLNvD2tCzenpZFzo5cDm5el/uHHs6gww6qdCPulidKJCJS7n2zYiP/HjWPzxespWa1Kgw49CAG92zBSV2aUUXVV0mnRCIi5dbcVZt5eMxC3vv6WxoeUJ0bBxzML45pT92a+tVVnujbEJFyZ9Li9Tzx+SJGz15NrepVuKp/R67u35F6taonOzQphBKJiJQL+fnOmHnZPDpuIZOXbKDhAdW59sROXHZsexrVqZHs8GQflEhEJKl27M7jnWlZvDRxGTOyNpHWoBa3nt6NoX3aULuGBk5MBUokIpIUm7bv5sUJS3nuyyVk5+ykfZM6/OvsHvykZwtqVVcCSSVKJCJSpjZs3cULE5byzBeL2bBtN8d1bsLd5/TimE4a+ypVKZGISJlYvn4b94yez3+/WcmuvHyO7dSEmwZ20dDtFYASiYgk1LxVOTw2biHvfv0t1asa5xzZigv7teOQg+olOzQpJUokIpIQo2ev5qExmUxfvpE6NapyYb+2XH5ce01dWwEpkYhIqZq8ZD33fbyA8Zlr6dC0Dn8Y1IWf9W6l4dsrMCUSESmx/HxnfOZaHvtsIV9krqNJ3ZrcfGoXLjmmPTWqaQysii6uRGJmvwJedPcNCY5HRFKIuzNyxioeGpPJ7JWbaVK3Jn8Y1IXz+7aljoYxqTTi/VOhOTDZzF4zs4EWRx89M2ttZmPMbLaZzTKz60P57WaWZWbTw2tQzD43m1mmmc0zs1NiygeGskwzGx5T3t7MJobyV81Mj7+KlIHcvHxGzVrF6Q+M55qXp5Kzczf/PrsHXww/kWHHd1QSqWTM3ePbMEoeA4BLgHTgNeApd1+4l+3TgDR3n2pm9YApwJnAOcAWd79zj+27Aa8AfYAWwMfAwWH1fODHwApgMjDU3Web2WvAW+4+wsweBb5290f2dR3p6emekZER1zWLyA+5O6Nnr+av789m+frttGl8AL86qRNn9W6lSaQqODOb4u7pha2L+88Gd3czWwWsAnKBRsAbZjba3X9fyPYrgZVhOcfM5gAt93GKwcAId98JLDazTKKkApDp7ovCxYwABofjnQScF7Z5Drgd2GciEZH9l5cfJZCnv1jMpMXr6dCkDg+d15sfd2uuNhCJu43keuAiYC3wJPA7d99tZlWABcD/JJI99m8HHA5MBI4BrjWzi4AM4Leh7aUlMCFmtxV8n3iW71HeFzgQ2OjuuYVsv+f5hwHDANq0aVP0BYsIADtz83hzShbPfrmY+au30Lx+TW4/oxtD+rTRMCbynXjvSBoBZ7n70thCd883s9P3taOZ1QXeBG5w981m9gjwV8DD+13Apfsd+X5w98eBxyGq2krkuUQqgm27cnl18nIeHruQNTk76ZpWn7vP6ckZPVtQXTMRyh6KTCRmVhUY4u63F7be3efsY9/qREnkJXd/K2y/Omb9E8B/w8csoHXM7q1CGXspXwc0NLNq4a4kdnsRKYZtu3J55oslPDpuITk7cunbvjF3n9OTYzs10ThYsldFJhJ3zws9ptq4+7J4Dxwa558C5rj73THlaaH9BOCnwMyw/B7wspndTdTY3hmYBBjQ2czaEyWKIcB5oc1mDHA2MAK4GHg33vhE5HuL1mzhsXGLeH/GSrbszOVHXZtz5QkdSG/bSAlEirQ/VVuzzGwSsLWg0N1/so99jgEuBGaY2fRQ9gdgqJn1IqraWgJcGY41K/TCmk3UmH+Nu+cBmNm1wCigKvC0u88Kx7sJGGFmfwOmESUuEYnT4rVbuf+TBbw9LYsa1arwk54tGHJka9LbNU52aJJC4ur+a2YnFFbu7uNKPaIEU/dfEcjauJ2Hx2QyYvJyqlc1LjqqHZcf155m9WolOzQpp0rc/TcVE4aI/K/szTt4ZNxCXpq4jPx8Z8iRrbn+5M40q68EIsUXb/fffsADQFegBlEV01Z3r5/A2ESklCxeu5UXvlrKaxnL2bYrl58e3opf/7izRuKVUhFvG8mDRI3crxM91X4R3z91LiLlVGb2Fh4em8k707IwMwZ1T+P6kzvRqZnmApHSsz9PtmeaWdXQAP6MmU0Dbk5caCJSXNk5O3hq/GKe/HwxBlxyTHuuPKGD2kAkIeJNJNvCgIjTzexfREOf6KkkkXJm3qocnh6/mHemZ7E7L58zerbgT6d3o4nmApEEijeRXEjULnIt8GuiBwR/lqigRCR+7s64+Wt4/LNFfLlwHbWqV+HMXi254vgOdGpWN9nhSSUQb6+tgqFRtgN/Tlw4IhKv/Hznk7nZPDI2k6nLNpLWoBY3DjiY8/u2pVEdzaggZWeficTMZhA9OFgod+9R6hGJyD7l5zsfzlrFPaPnsyB7C60a1eYvgw9laJ82GgdLkqKoO5J9DsgoImUnNy+fd6Z/y5OfL2Luqhw6NKnDXT/vyeBeLaimBCJJtM9EsudovyKSHJ/OXc0/P5jL/NVb6HJQPf51dg/OOrylEoiUC/E+kJjD91VcNYDq6IFEkYT7bP4a7v9kARlLN9C+SR3uG9KLn/RsoYEUpVyJt7H9u6eXwqi+g4F+iQpKpLLLWLKeez6ezxeZ62jRoBa3nt6NC/q11WyEUi7F/UBiAY9GeXzHzG4Dhpd+SCKVU8F86E9+vphJS9ZzYJ0a3HJaVy48qi01q2k2Qim/4q3aOivmYxWiYVJ2JCQikUpoxopN/OW/s5i8ZANpDWrxx0FdOb9fGw6osd9/64mUuXh/Ss+IWc4lmkdkcKlHI1LJLF23lQc+zeTNqSs4sE4N/nrmYQw9srUa0SWlxNtGckmiAxGpTL5evpGHx2by0ezVVDXj8mPbc+2JnWlwQPVkhyay3+Kt2uoA3EfUwO7AV8Cv3X1RAmMTqXCyc3bwz5FzeWtaFg0PqM7V/TtyQb+2pDWonezQRIot3qqtl4GHiOZYh2hI+VeAvokISqSi2b4rj6fGL+KRsQvZnef88oSOXH1iR+rX0h2IpL54E8kB7v5CzOcXzex3iQhIpCLZuG0Xz3yxhJcmLmXtll2c1KUZfxjUVYMpSoVS1FhbjcPiB2Y2HBhBVLV1LjAywbGJpKzsnB28MnE5T41fxOYduZzUpRnDju9Avw4HJjs0kVJX1B3JFKLEUfAY7ZUx6xxNbCXyA1t25vLsF4t5ZOxCtu7K4+QuzfjNgIM5tEWDZIcmkjBFjbXVviQHN7PWwPNAc6LE87i73xfudF4F2hF1JT7H3TeEp+bvAwYB24BfuPvUcKyLgVvCof/m7s+F8iOAZ4HaRHdJ14eHJkXKTG5ePi9NXMa9H89nw7bdnNSlGTef2oXOzTWlrVR8cT/tZGaHAd2A7+bqdPfni9gtF/itu081s3rAFDMbDfwC+MTd/xmqzIYDNwGnAp3Dqy/wCNA3JJ7biB6E9HCc99x9Q9jmCmAiUSIZCHwQ73WJlNS0ZRv407szmZm1mX4dGjP81K70at0w2WGJlJl4u//eBvQnSiQjiX7hjye629grd19JNC0v7p5jZnOAlkQPM/YPmz0HjCVKJIOB58MdxQQza2hmaWHb0e6+PsQzGhhoZmOB+u4+IZQ/D5yJEomUgayN2/m/92czcsYqmtStyQNDD+f0HmkaUFEqnXjvSM4GegLT3P0SM2sOvLg/JzKzdsDhRHcOzUOSAVhFVPUFUZJZHrPbilC2r/IVhZTvee5hwDCANm3a7E/YIv8jNy+ft6Zm8fcP5rBjdx7XntiJX/bvSN2aGs5EKqd4f/K3u3u+meWaWX0gm2je9riYWV3gTeAGd98c+xebu7uZJbRNw90fBx4HSE9PV/uJFNvSdVu5+qWpzPp2M4e3aci/z+5Bp2ZqB5HKLd5EkmFmDYEniHpybSF6ur1IZladKIm85O5vheLVZpbm7itD1VV2KM/ihwmqVSjL4vuqsILysaG8VSHbi5Sq3Lx8nv1yCf8aNY/a1aty35BenNGjBVWqqBpLJN6xtq4Oi4+a2YdE7RLfFLVf6IX1FDDH3e+OWfUecDHwz/D+bkz5tWY2gqixfVNINqOAv5tZo7DdAOBmd19vZpvNrB9RldlFwAPxXJNIvCYsWsef/zObOSs3c1znJvzjrO60anRAssMSKTfibWz/KfCpu29y9yWhEfxMd3+niF2PAS4EZpjZ9FD2B6IE8pqZXQYsBc4J60YSdf3NJOr+ewlASBh/BSaH7f5S0PAOXM333X8/QA3tUkqWrtvKPaPn8870b0lrUEuN6SJ7YfE8cmFm09291x5l09z98IRFliDp6emekZGR7DCkHNu2K5d7P17AM18sxjAuO64915/cmVrVNbmUVF5mNsXd0wtbF28bSWGTI6iLilQ4Y+Zm8+f/zGLJum2cfUQrbhxwCAc1qFX0jiKV2P40tt9NNAIwwDVEje4iFcLqzTu4Z/R8RkxeTqtGtXn58r4c3alJssMSSQnxJpJfAX8iGtYEYDRRMhFJaXn5zhOfL+Ke0fPJzXeGHd+B351yCNU1Q6FI3OLttbUVGB6GOXF335LYsEQS76uF6/jHB3P4ZsUmftS1GX86vRttD6yT7LBEUk68vba6Ew2H0jh8Xgtc7O4zExibSEKs27KTez6ez4sTltGyYW3u/HlPfta7pXpjiRRTvFVbjwG/cfcxAGbWn+hJ8aMTFJdIqdu2K5cnPlvMU+MXkbMzl0uOacfvT+lC7RrqjSVSEvEmkjoFSQTA3ceameoAJCW4O58vWMst78xk2fpt/Khrc24aeIiGeBcpJfEmkkVm9iegYLrdC4BFiQlJpPTMW5XD396fzecL1tKm8QG8fEVfju6o3lgipSneRHIp8GegYKysz0KZSLmUnbODf304jzenrqB+reoMP7ULlxzTjprVVI0lUtriTSTt3f26hEYiUgrcnXemZ3Hru7PYuTufy49tz1X9O9G4To1khyZSYcWbSO4ys4OAN4BX1VtLyqNVm3bwuze+5vMFa+ndpiH/OrsnnZrVTXZYIhVevM+RnBgSyTnAY2FOklfd/W8JjU4kDrvz8nnhq6Xc+/F8dubmc8tpXbnkmPZU1RDvImUi7vGy3H0VcL+ZjQF+D9wKKJFIUs3+djM3vv41s1dG86X//afd6dBUdyEiZSneBxK7AucCPwPWEQ2V8tsExiWyTzk7dnPnqHm8NHEZ9WtX59ELejPwsLRkhyVSKcV7R/I0MAI4xd2/TWA8IkV6e9oK/u/9uazdspOhfVrzu1O6qDFdJInibSM5KtGBiBQlO2cHv3/jG8bOW0PP1g158uJ0erVumOywRCo9zSki5V5uXj4vTFjKHR/OJd/hD4O6cNmxHdSYLlJOKJFIufbZ/DX8feQc5q7K4bjOTbjtjEPVpVeknNnnpAtm9kJ4v75swhGJ7Nidx63vzuSipyexbVce9w3pxfOX9lESESmHirojOcLMWgCXmtnzwA/qEtx9fcIik0pr/IK13PruTBat3cpFR7XlD4O6ar50kXKsqETyKPAJ0IFoat3YROKhXKRU5OU7j45byJ0fzaN1owN47tI+nHBw02SHJSJF2GfVlrvf7+5dgafdvYO7t495FZlEzOxpM8s2s5kxZbebWZaZTQ+vQTHrbjazTDObZ2anxJQPDGWZZjY8pry9mU0M5a+amfqApqiJi9ZxxgPj+feoeQzo1pxRNxyvJCKSIuKamNrdrzKznmZ2bXj1iPP4zwIDCym/x917hddIADPrBgwBDg37PGxmVc2sKvAQcCrQDRgatgW4IxyrE7ABuCzOuKSc2J2XzwOfLGDoExPYsG0XD553OI9ecIQmmxJJIXElEjO7DngJaBZeL5nZr4raz90/A+JtRxkMjHD3ne6+GMgE+oRXprsvcvddRA9GDrZoXtSTiAaSBHgOODPOc0k5kJmdw08e/IK7Rs/n1MPS+PCG4zm9RwtNeSuSYuLt/ns50NfdtwKY2R3AV8ADxTzvtWZ2EZAB/NbdNwAtgQkx26wIZQDL9yjvCxwIbHT33EK2/wEzGwYMA2jTpk0xQ5bSkpuXz5PjF3P3R/M5oGZVHjm/N6d21/AmIqkqrjsSokb2vJjPeezRg2s/PAJ0BHoBK4G7inmcuLn74+6e7u7pTZuq3j2ZZqzYxJkPf8E/P5jL8Qc35aMbjlcSEUlx8d6RPANMNLO3w+czgaeKc0J3X12wbGZPAP8NH7OA1jGbtgpl7KV8HdDQzKqFu5LY7aWccXdenrSMW9+dReM6NXhg6OGc3iNN1VgiFUC8Y23dbWZjgWND0SXuPq04JzSzNHdfGT7+FCjo0fUe8LKZ3Q20ADoDk4jufDqbWXuiRDEEOM/dPQxpfzZRu8nFwLvFiUkSa9GaLdz67izGZ67luM5NePC83jSoXT3ZYYlIKdmf+UimAlP35+Bm9grQH2hiZiuA24D+ZtaL6DmUJcCV4fizzOw1YDaQC1zj7nnhONcCo4CqRF2RZ4VT3ASMMLO/AdMo5l2SJEZuXj73f5rJo+MWUrNaFf78k0O5sF9bqmiMLJEKxdw92TGUqfT0dM/IyEh2GBXe6s07+O1rXzM+cy0DujXnr2ceRvP6tZIdlogUk5lNcff0wtZp0EYpdV8uXMt1r0wjZ0cud/ysO+ceqZ5yIhVZ3InEzNoCnd39YzOrDVRz95zEhSapxt157LNF3DlqHm0aH8BLl/fjkIPqJTssEUmweKfavYLoOYzGRF13WxGNw3Vy4kKTVBI76dQphzbnX2f3VIO6SCUR7x3JNURPmE8EcPcFZtYsYVFJShk7L5sbX/+azTty+eOgrlx2bHs1qItUIvEmkp3uvqugz7+ZVSPqdSWV2KZtu/nr+7N5Y8oKuhxUjxcu60vXtPrJDktEyli8iWScmf0BqG1mPwauBv6TuLCkvBszN5ub3vyGdVt38csTOnL9yZ010KJIJRVvIhlONLLuDKLnPka6+xMJi0rKrbVbdnLzWzMYPXs1nZrV5amLj6R7qwbJDktEkijeRHI+0ci83yUPMzvd3f+7j32kgvly4Vp++9rXrN+6i9+dcgiXHdteMxeKSNyDNj4AfG5mXWPK/pKAeKQccneeGr+YC56cSK3qVXnzqqO55sROSiIiAsR/R7KYqGrrDTO73d1fp/ij/0oK+Xbjdm55Zyafzs3m5C7NuG/o4dStqedYReR78f5GcHefamYnAK+YWV+ica+kApu+fCO/fGEKm3fs5qaBXbjy+A7q1isi/yPeqq2VAO6+FjiFqOvvYYkKSpLL3XktYznnPvYVVasYb151NFf176gkIiKFincY+dNilvOB34WXVDAbt+3iuhHT+Wz+Go5s14hHLjiCJnVrJjssESnH9plIzOxed7/BzP5DIQ8guvtPEhaZlLkJi9Zx4+tfk715J7ed0Y0L+7WlWtV4b1pFpLIq6o7khfB+Z6IDkeQaMWkZt7wzk7SGtXjx8r70ad842SGJSIrYZyJx9ynhfRyAmVUnahvJcvfsxIcnibYzN487Rx71KqgAABRySURBVM3jic8Xc2ynJjx8QW/q19JgiyISv33WW5jZo2Z2aFhuAHwNPA9MM7OhZRCfJNDaLTsZ+vgEnvh8Mef3bcNTv0hXEhGR/VZU1dZx7v7LsHwJMN/dzzSzg4APgFcSGp0kzKxvN3HFcxms3bqLB887nNN7tEh2SCKSoopKJLtiln8MvA7g7qsKRgKW1DN+wVqGvZBBg9rVefOXR2usLBEpkaISyUYzOx3IAo4herq9YBj52gmOTUqZu/Pw2IXc9dE8Ojaty3OX9qFFQ32NIlIyRSWSK4H7gYOAG9x9VSg/GXg/kYFJ6dqxO49b3pnJG1NWcFqPNP5xVne1h4hIqdhnY7u7z3f3ge7ey92fjSkf5e6/LergZva0mWWb2cyYssZmNtrMFoT3RqHczOx+M8s0s2/MrHfMPheH7ReY2cUx5UeY2Yywz/2m+rZCbdq+m6FPTOCNKSu49sROPDj0cCURESk1iX7a7Flg4B5lw4FP3L0z8En4DHAq0Dm8hgGPQJR4gNuAvkTT/d5WkHzCNlfE7LfnuSq95eu38fNHv2Rm1iYePO9wbjzlEJRvRaQ0JTSRuPtnwPo9igcDz4Xl54AzY8qf98gEoKGZpRGN7TXa3de7+wZgNDAwrKvv7hPc3Ym6JZ+JfGfasg389OEvWblpB8/8oo96ZolIQiRj/Ivm7r4yLK8CmofllsDymO1WhLJ9la8opPx/mNkwM8sws4w1a9aU/ApSwIczV3HOY19Rq3oV3rrqaI7t3CTZIYlIBbXficTMSm1WxHAn8T9jeJU2d3/c3dPdPb1p06aJPl1SuTv3fjyfX744hUMOqse71xxD5+b1kh2WiFRgxbkjKfSv/v2wOlRLEd4LhlrJAlrHbNcqlO2rvFUh5ZVWfr5zyzszuffjBZx1eEve+OXRHKiRe0UkwYqTSKaV8JzvAQU9ry4G3o0pvyj03uoHbApVYKOAAWbWKDSyDwBGhXWbzaxf6K11UcyxKh1350/vzuSlicsYdnwH7vx5T02FKyJlYr/nTHX3S+Pd1sxeAfoDTcxsBVHvq38Cr5nZZcBS4Jyw+UhgEJAJbCMakgV3X29mfwUmh+3+4u4FDfhXE/UMq000ZMsH+3s9FYG78/eRc3hp4jKuPKEDwwd2Uc8sESkzFjVTVB7p6emekZGR7DBKTX6+89f3Z/PMF0s4v28b/nbmYUoiIlLqzGyKu6cXtm6/70ik/MgLbSKvTFrGL45ux21ndFMSEZEyp0SSonLz8vnj2zN5NWM5V/XvyO/1oKGIJElRU+1eFOdxprv7N6UQj8Rhx+48fvPadEbOWMXV/Tvy+4Fdkh2SiFRiRd2RtI/zOEtKGIfEKWfHbi56ehLTlm3kpoFduKp/x2SHJCKVXFGJ5O/uvrtMIpEirduyk4ufmcSclTk8fH5vBnVPS3ZIIiJFPkeSZWZPmtnJGlk3uRav3cpZj3zJgtVbePzCI5RERKTcKCqRdCV6fuMWYLmZ3RceFpQylJm9hSGPf0XOjlxevqIvJ3dtXvROIiJlpKj5SNa5+2PufiLREO6LgHvMbKGZ/V+ZRFjJLV+/jUufnUxevvPKFf04om3jZIckIvIDcQ+R4u7fAk8RzQGSA1yeqKAksnbLTs597Cs2bNvFExelc8hBGnxRRMqfIhOJmdUys5+b2VtEw5ecRDQZlSa3SKBN23Zz2XMZrNu6ixcv68vhbRoVvZOISBIU9RzJy8CPgHHAS8B57r6jLAKrzDbv2M0FT01k7qrNPHReb3q2bpjskERE9qqo7r8fAle6e05ZBCPRw4bDns9g7qrNPHbhEZzURQ3rIlK+FZVIxgIFw7fvy0Z331w6IVVeu3LzueL5DCYuXs895/RSEhGRlFBUInmuiPUQzXD4LNGc6VJM7s5Nb37D5wvWcsfPunPm4SWdP0xEpGzsM5GEbr+SYO7OXR/N5+1pWfzmxwdz7pFtkh2SiEjcijNDopSyR8Yt5MExmZyT3opfndQp2eGIiOwXJZIkGzVrFf8eNY/TeqRxx896aCh4EUk5SiRJlJm9hV+/Op0erRpy59k9lUREJCUpkSTJhq27uOy5ydSsVoVHL+hN7RpVkx2SiEixaIbEJNiZm8eVL05h5cYdvHxFX9Ia1E52SCIixaZEkgT/GDmXSYvXc9+QXqS30yCMIpLaVLVVxsbNX8OzXy7hgn5tGNxLz4qISOpLWiIxsyVmNsPMpptZRihrbGajzWxBeG8Uys3M7jezTDP7xsx6xxzn4rD9AjO7OFnXE4/M7Byue2UahzSvx/BTuyY7HBGRUpHsO5IT3b2Xu6eHz8OBT9y9M/BJ+AxwKtA5vIYRDWWPmTUGbgP6Es2Xclscw7kkxcZtu7jsuQyqV63C4xcdQd2aqlUUkYoh2YlkT4P5fliW54AzY8qf98gEoKGZpQGnAKPdfb27bwBGAwPLOuii5Oc7v3nta7I2bOfh83vT9sA6yQ5JRKTUJDOROPCRmU0xs2GhrLm7rwzLq4CCUQtbAstj9l0RyvZW/gNmNszMMswsY82aNaV5DXF59sslfDo3mz+e1pU+7dW4LiIVSzLrV4519ywzawaMNrO5sSvd3c3MS+NE7v448DhAenp6qRwzXovWbOGfH87l5C7N+MXR7cry1CIiZSJpdyTunhXes4G3ido4VocqK8J7dtg8C2gds3urULa38nIhP9/507szqVG1Cv84q7ueXBeRCikpicTM6phZvYJlYAAwE3gPKOh5dTHwblh+D7go9N7qB2wKVWCjgAFmVjBnyoBQVi68MnkZX2Su4+ZBXWhWv1aywxERSYhkVW01B94Of6FXA1529w/NbDLwmpldBiwFzgnbjwQGEc0Zvw24BMDd15vZX4HJYbu/uPv6sruMvVu6biv/HDmXvu0bc14fDQsvIhVXUhKJuy8CehZSvg44uZByB67Zy7GeBp4u7RhLIj/fuX7EdDC48+cajFFEKjY9zJAAL05cyvTlG7n33F60bnxAssMREUmo8vYcScpbv3UXd46ax9EdD2RwrxbJDkdEJOGUSErZP0bOYfvuPG7/yaGq0hKRSkGJpBSNmZfN61NWcOmx7Tm4eb1khyMiUiaUSErJ9l153PL2TDo3q8uvf3RwssMRESkzamwvJY99tpCsjdsZMawftaprtkMRqTx0R1IKlq/fxqPjFjKo+0H063BgssMRESlTSiSl4O8j5wBwy2ndkhyJiEjZUyIpoclL1vPBzFVc3b8TLRpq7nURqXyUSEro7o/m06RuDa44rkOyQxERSQolkhL4fMEavlq0jqv6d6J2DTWwi0jlpERSTO7OXR/Np2XD2pzfV4MyikjlpURSTF9krmP68o1cc2IndfcVkUpNiaSYnvliMU3q1uRnR/zPzL4iIpWKEkkxLFidw6fzsjmvT2tqVtPdiIhUbkokxfDSxGVUq2L84pj2yQ5FRCTplEj207Zdubw5ZQWndU+jcZ0ayQ5HRCTplEj203vTvyVnZy4X9Gub7FBERMoFJZL99Na0LDo1q8sRbRslOxQRkXJBiWQ/rNq0g8lL1nNa9zRNWiUiEiiR7Id3p2fhDmf1VpdfEZECFSKRmNlAM5tnZplmNjxR5/lkTjbd0urT9sA6iTqFiEjKSflEYmZVgYeAU4FuwFAzK/Xx3LftymXqsg2ccEjT0j60iEhKS/lEAvQBMt19kbvvAkYAg0v7JNOWbSQ33+nbvnFpH1pEJKVVhETSElge83lFKPuOmQ0zswwzy1izZk2xTlKjWhV+1LUZPVs1LH6kIiIVUKWYs93dHwceB0hPT/fiHOPIdo05sp3uRkRE9lQR7kiygNYxn1uFMhERKQMVIZFMBjqbWXszqwEMAd5LckwiIpVGyldtuXuumV0LjAKqAk+7+6wkhyUiUmmkfCIBcPeRwMhkxyEiUhlVhKotERFJIiUSEREpESUSEREpESUSEREpEXMv1vN5KcvM1gBLi7l7E2BtKYaTTBXlWnQd5Yuuo/wprWtp6+6FDjZY6RJJSZhZhrunJzuO0lBRrkXXUb7oOsqfsrgWVW2JiEiJKJGIiEiJKJHsn8eTHUApqijXousoX3Qd5U/Cr0VtJCIiUiK6IxERkRJRIhERkRJRIomTmQ00s3lmlmlmw5MdTwEzW2JmM8xsupllhLLGZjbazBaE90ah3Mzs/nAN35hZ75jjXBy2X2BmF8eUHxGOnxn2tVKK+2kzyzazmTFlCY97b+co5eu43cyywncy3cwGxay7OcQ0z8xOiSkv9OcrTI8wMZS/GqZKwMxqhs+ZYX27El5HazMbY2azzWyWmV0fylPqO9nHdaTid1LLzCaZ2dfhWv5c3POX1jXulbvrVcSLaHj6hUAHoAbwNdAt2XGF2JYATfYo+xcwPCwPB+4Iy4OADwAD+gETQ3ljYFF4bxSWG4V1k8K2FvY9tZTiPh7oDcwsy7j3do5Svo7bgRsL2bZb+NmpCbQPP1NV9/XzBbwGDAnLjwJXheWrgUfD8hDg1RJeRxrQOyzXA+aHeFPqO9nHdaTid2JA3bBcHZgY/v326/yleY17jbU0filU9BdwFDAq5vPNwM3JjivEsoT/TSTzgLSwnAbMC8uPAUP33A4YCjwWU/5YKEsD5saU/2C7Uoi9HT/8BZzwuPd2jlK+jtsp/JfWD35uiObQOWpvP1/hF8laoNqeP4cF+4blamE7K8Xv5l3gx6n6nRRyHSn9nQAHAFOBvvt7/tK8xr29VLUVn5bA8pjPK0JZeeDAR2Y2xcyGhbLm7r4yLK8CmoflvV3HvspXFFKeKGUR997OUdquDVU+T8dU1ezvdRwIbHT33D3Kf3CssH5T2L7EQpXI4UR/Aafsd7LHdUAKfidmVtXMpgPZwGiiO4j9PX9pXmOhlEhS37Hu3hs4FbjGzI6PXenRnxQp18e7LOJO4DkeAToCvYCVwF0JOEdCmFld4E3gBnffHLsulb6TQq4jJb8Td89z915AK6AP0CXJIRVKiSQ+WUDrmM+tQlnSuXtWeM8G3ib6YVttZmkA4T07bL6369hXeatCyhOlLOLe2zlKjbuvDr8A8oEniL6T4lzHOqChmVXbo/wHxwrrG4Tti83MqhP98n3J3d8KxSn3nRR2Han6nRRw943AGKJqpv09f2leY6GUSOIzGegcejLUIGrIei/JMWFmdcysXsEyMACYSRRbQW+Zi4nqiQnlF4UeN/2ATaFKYRQwwMwahVv+AUR1oiuBzWbWL/SwuSjmWIlQFnHv7RylpuCXYvBTou+k4NxDQu+a9kBnogboQn++wl/nY4CzC4k39jrOBj4N2xc3ZgOeAua4+90xq1LqO9nbdaTod9LUzBqG5dpEbT1zinH+0rzGwpVWo1ZFfxH1UplPVEf5x2THE2LqQNTT4mtgVkFcRHWcnwALgI+BxqHcgIfCNcwA0mOOdSmQGV6XxJSnE/2nWwg8SOk1Hr5CVMWwm6gO9rKyiHtv5yjl63ghxPlN+E+cFrP9H0NM84jpAbe3n6/wHU8K1/c6UDOU1wqfM8P6DiW8jmOJqpS+AaaH16BU+072cR2p+J30AKaFmGcCtxb3/KV1jXt7aYgUEREpEVVtiYhIiSiRiIhIiSiRiIhIiSiRiIhIiSiRiIhIiSiRSMoyMzezu2I+32hmt+/nMZ61aFTYmuFzEzNbUkrx9Tez/5bGsYo4T9MwUus0Mztuj3U3mNkBcRxjS+IilIpOiURS2U7gLDNrUsLj5BE9+1CumFnVODc9GZjh7oe7++d7rLuBaMA/kYRRIpFUlks0H/WvS3ice4FfxwwJAfzvHYWZPWhmvwjLS8zsHxbmgTGz3mY2yswWmtkvYw5T38zet2jOh0fNrErYf4CZfWVmU83s9TA2VMFx7zCzqcDP94innZl9atHAg5+YWRsz60U0DPvgEEvtmO2vA1oAY8xsTCgbatGcIDPN7I49/yHCHdlXZnZa+Pw7M5sczvnnmDjmmNkTFs2T8VHBec3sOovmAvnGzEYU7+uQVKNEIqnuIeB8M2tQgmMsA8YDF+7vfh4NqPc58CzRkBL9gD/HbNMH+BXRnBAd+f4O6hbgRx4NuJkB/CZmn3Xu3tvd9/xF/ADwnLv3AF4C7nf36cCtRHNP9HL37QUbu/v9wLfAie5+opm1AO4ATiIavPBIMzuzYHszaw68T/QE9ftmNoBoOI0+Yfsj7PtBQTsDD7n7ocBG4GehfDhweIgxNqFKBVat6E1Eyi9332xmzwPXAduL2n4f/kE0ntD7+7FPwXhrM4gmIMoBcsxsZ8EYScAkd18EYGavEA3hsYMosXwRDQ1FDeCrmOO+upfzHQWcFZZfILoT2R9HAmPdfU2I5yWiibneIZo46RPgGncfF7YfEF7Twue6RAlkGbA4JDGAKURzskA0nMdLZvZOOK5UAkokUhHcSzTpzzOFrTSzUURzXGS4++WFbePuCyya9+GcmOJcfnjXXmuP3XaG9/yY5YLPBf+39hyDyInGqRrt7kMLvRrYupfyRMolSginAAWJxIB/uPtjsRtaNM9H7PXmAQVVaqcRJaczgD+aWXf/fl4LqaBUtSUpz93XE00Netle1p8Sqn0KTSIx/g+4MebzUqBbGDW1IVGj9v7qE0ZXrQKcS1SFNgE4xsw6wXejOB8cx7G+JBqhFeB8oiq1ouQQTTkL0SB8J4R2kKpEsxQWJA0n6nDQxcxuCmWjgEtj2m9amlmzvZ0oXGNrdx8D3EQ0jHndOGKUFKc7Eqko7gKuLckB3H1WaOTuHT4vN7PXiEZeXcz3VTz7YzLRSLediIbmftvd80Oj/SsF3Y6J2kzmF3GsXwHPmNnvgDXAJXGc/3HgQzP7NrSTDA9xGPC+u383PLi755nZUOA9M8tx94fNrCvwVaiC2wJcQHQHUpiqwIuhvcqI2nA2xhGjpDiN/isiIiWiqi0RESkRJRIRESkRJRIRESkRJRIRESkRJRIRESkRJRIRESkRJRIRESmR/wdnK1o3o/TkYAAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.plot(N, V)\n",
"plt.xlabel('N - Number of tokens')\n",
"plt.ylabel('|V| - Size of vocabulary')\n",
"plt.title('Heaps\\' Law')"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1000 610\n",
"10000 3597\n",
"20000 5920\n",
"30000 7753\n",
"50000 10806\n",
"100000 16675\n"
]
}
],
"source": [
"for i in [1000, 10000, 20000, 30000, 50000, 100000]:\n",
" print(i, V[i-1])"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"from math import log"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.3188109468386284\n",
"0.6824809994294719\n",
"1.3976147883124628\n",
"0.655343979207257\n",
"1.5589214096765935\n",
"0.6405478197637083\n"
]
}
],
"source": [
"print(8385/6358)\n",
"print(log(1.3188, 3/2))\n",
"print(11719/8385)\n",
"print(log(1.397614, 5/3))\n",
"print(18269/11719)\n",
"print(log(1.558921, 10/5))"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"10.340940789558791"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"11719 / 50000 ** 0.65"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 1.0, \"Heaps' Law\")"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAEWCAYAAABMoxE0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3dd3hVVdbA4d9Ko/cmHUQQsYAaimJvCBbsYsWKY++KM/qBozPqjH1QEBRFpYoNlSIgTZQSeodQE1qAFBICqev74+yMVyaQG+69uSnrfZ775Jx92jremMXZe5+9RVUxxhhjjlVEuAMwxhhTtlkiMcYYExBLJMYYYwJiicQYY0xALJEYY4wJiCUSY4wxAbFEYowxJiCWSIwBRGSLiFxyWNldIvJruGJyMRzxRa/CYjYmHCyRGGOMCYglEmP8JCJNRORrEdkjIptF5DGfbV1E5HcRSRWRnSIySERifLariDwmIptEZK+I/FtEIty2E0RkloikuW1jA4yzjoj86OJMccvN3LYLRWSFz75TRWShz/ocEbkmkOubiscSiTF+cH/0fwCWAU2Bi4EnRKSH2yUPeBKoD5zltj902GmuBWKBM4DewD2u/BXgZ6AO0Az4T8EBqirHEG4E8CnQEmgBHAQGuW3zgLYiUl9EooHTgCYiUkNEqrj45hzDNU0FFhXuAIwpRb4TkVyf9RhgsVvuDDRQ1b+79U0iMgzoA0xR1UU+x20RkY+A84F3fcrfUNVkIFlE3gVuAT4GcvD+6DdR1UQgoHYZVd0HfF2wLiL/AGa4bQfdE8h5wA68xJgKdAeygA3ueGP8Zk8kxvzhGlWtXfDhz08ULfH+5Z5a8AH+CjQCEJF2rgppl4jsB/6J93TiK8FneSvQxC0/BwiwQERWicg9BEBEqorIRyKy1cUyG6gtIpFul1nABXjJZBYwEy/pne/WjSkWSyTG+CcB2OybaFS1hqr2ctsHA2uBtqpaEy/JHF4t1dxnuQXeEwGquktV71fVJsADwIcickIAsT4NnAh0dbGc58oL4jk8kczCEokJgCUSY/yzAEgXkedFpIqIRIrIKSLS2W2vAewHMkSkPfBgIed41jWENwceB8YCiMiNBY3hQAqgQL6fcUWLSGWfT5SL5SCQKiJ1gQGHHfMbXqLpAixQ1VV4T1xd8Z5ejCkWSyTG+EFV84ArgU7AZmAvXvtGLbfLM8CtQDowDJckDvM9sAhYCvwEfOLKOwPzRSQDmAA8rqqb/AxtIl7SKPgMxGuXqeJinAdMPuxeDuC1/axS1WxX/DuwVVWT/LyuMf8lNrGVMaHnXixsq6rx4Y7FmGCzJxJjjDEBsURijDEmIFa1ZYwxJiD2RGKMMSYgFe7N9vr162urVq3CHYYxxpQpixYt2quqDQrbVuESSatWrYiLiwt3GMYYU6aIyNYjbbOqLWOMMQGxRGKMMSYglkiMMcYExBKJMcaYgFgiMcYYExBLJMYYYwJiicQYY0xALJEYY0x5l5kMc9+DLQHN4nxEFe6FRGOMqTB2LIWFw2DFeMg9BN2fgFbnBP0ylkiMMaY8yc2C1d/DgmGQuACiq0LHW6DL/dDo5JBc0hKJMcaUB2mJEPcpLB4BB/ZA3TZw+eteEqlSO6SXtkRijDFllSpsmQMLhsLaiaD50O5y7+nj+AshIoJt+zKZvHAjk1bu4qrTmnDPOa2DHoYlEmOMKWuy0mHZGFj4MexZC1XqwNmPQOw9UKcViSmZjJsez7TVu1m9cz8ApzStSd1qMSEJxxKJMcaUFXvWeclj6WjITofGnaD3h3DKdWRqNFNX7+b77xcyY10SApzZsg4v9GxPz1Ma06Je1ZCFZYnEGGNKs7xcWD/Zq77aPAsiY+Dka6FLPw7U78ic+H38MG4NM9clcSA7j/rVY3jkwhPo06UFTWtXKZEQLZEYY0xplJkMS77wel+lJUDNpnDRS2R3vINFe6P4ceEOvl/6CxlZudSrFsNVHZtwxWmNObtNfSIjpERDtURijDGlye7VsOAjWDYWcg9Cy3Ogxz/YXP8Cvl++m1GDVpCUnkVMVARXnNqYG85sRpfWdYmODN/75ZZIjDEm3PLzYP0UmD8YNs+GqMpw6o3kxN7PpL0NGPXrVuZt8t5KP69dA165pgVntalHzcrRYQ7cY4nEGGPC5WAqLPnSa/9I3epVX108gPjm1zNqxQG+G76d5AOJtKxXlWcua8e1ZzQrsXaP4rBEYowxJW3POpj/ESwbDTmZ0OJsci9+man5sXw6L5EFP60kQuDSDo244czmXNS+YYm3exSHJRJjjCkJ+fkQPxXmDYZNMyCyEpx6I6mn3s3YxDqMnZLApr3LaVijEi9ecRJXd2pCwxqVwx21XyyRGGNMKB1Kg6WjvCeQlM1QozF64Yv8Ur0XX689xLThSWTn7aZj89p8eNsZXNqhUVgbzo+FJRJjjAmFvRu8to+loyA7A5p3ZUfss4xL78S3C5LYum8L9avHcGvXFtzatQXtGtUId8THzBKJMcYES34+bPzF630VPw0iY8g/+Trm1b+BD9fX5Ncf9iKyhc4t6/LYRW3p3akJUWXs6aMwlkiMMSZQ2ZmwfIzX/rF3PVRvRGKnJxmWeT7fLs9m/6FsjquZwXOXn8hNsc2pX71SuCMOqpAmEhGpDMwGKrlrjVfVASLSGhgD1AMWAXeoaraIVAI+B84E9gE3q+oWd64XgHuBPOAxVZ3iyi8H3gMigY9V9fVQ3pMxxvxX+i7vzfO44XAwGT2uI4vPfJ23d5zM3Hnp1KyczUXtG3JVxyac365BuXj6KEyon0iygItUNUNEooFfRWQS8BTwjqqOEZEheAlisPuZoqoniEgf4A3gZhHpAPQBTgaaANNEpJ27xgfApUAisFBEJqjq6hDflzGmItu5DH7/EFZ+Dfm5HGxzOV/HXM276xuwd0s2TWvn8uIVJ3Fb15ZUiYkMd7QhF9JEoqoKZLjVaPdR4CLgVlc+AhiIl0h6u2WA8cAgERFXPkZVs4DNIhIPdHH7xavqJgARGeP2tURijAmu/Hxv8MR5H8KWOWh0Nba2vplh2ZcxZk00qsoFJ9bm1i4tuKh9QyJK8XsfwRbyNhIRicSrvjoB7+lhI5Cqqrlul0SgqVtuCiQAqGquiKThVX81Beb5nNb3mITDyrsWEkM/oB9AixYtAr8pY0zFkZXhuu8OhuRN5FZvwqzmjzIg4UwSV8VQr1oM953TjNu7taR53dAN1V6ahTyRqGoe0ElEagPfAu1Dfc1CYhgKDAWIjY3Vkr6+MaYMSkv03v1YPAIOpZFc5zRG1OrPoN0nI8lRXNqhEa92bs65bRuU6rfOS0KJ9dpS1VQRmQGcBdQWkSj3VNIM2O522w40BxJFJAqohdfoXlBewPeYI5UbY0zxJS6CeR+gq74DlMXVzuXfeRczb+cJtKxXlScubcZ1Z5bOMa/CJdS9thoAOS6JVMFrFH8DmAHcgNdzqy/wvTtkglv/3W3/RVVVRCYAo0TkbbzG9rbAAkCAtq4X2Ha8BvmCthdjjPFPXi6s/dFr/0iYT3ZUdX6IuYp39l9IZkRTrjizMY+dehzdWterUG0f/gr1E0ljYIRrJ4kAxqnqjyKyGhgjIq8CS4BP3P6fAF+4xvRkvMSAqq4SkXF4jei5wMOuygwReQSYgtf9d7iqrgrxPRljyousdFj8hdf+kbqN5JgmDOUuvsw4h9ZNG/PAhc254YxmFaLnVSDE61hVccTGxmpcXFy4wzDGhNP+nbDgIzRuOHIojbUxHXgn4zJmSRcuPaUJd53dkjNa1MHrNGoARGSRqsYWts3ebDfGVBxJa+C3QejysWh+HrMiuvJeVk92VzqFu3q04o3OzaldNSbcUZY5lkiMMeWbKmyZQ96v7xG5cRpZUokxORcwPK8XLduewn2xzeh5SuMK3/MqEJZIjDHlU14uuvo7Mme8Q7XklaRSi09zbuSXGlfS69yTGXNmMxrXsp5XwWCJxBhTruih/eyZ/QkxcUOonb2LXfmNGUE/Dp10A1fHtuGpNtbzKtgskRhjyoW0pG1smPAWJyZ+RUMOsDC/Pb8f9xDHdb6G509rSrVK9ucuVOy/rDGmTNu0Oo606W9zyt7JnE4+i6qew77T+tH53B50LmfDtZdWlkiMMWWO5uezdv5ksma9Q6dDCzioMcytdSVNej1Nl/Ydwx1ehWOJxBhTZmzds5/VM8bQeu1QTsrfwD5qMa/Vg7S74nEuaNA43OFVWJZIjDGl3pbdycz7bjBddnxBT9nJrsjGzOvwIh2vfIhuVauFO7wKzxKJMabUStiZxMof3uP07SPpIynsqHoi+y58meNib+C4CBu2pLSwRGKMKXVWb9hI4uS36br3W3rKATbWOJOUHs/T5JTLwIYtKXUskRhjSgVVZeGSJaROf4fzMibRnlzW1jmfnJ7P0+bEs8MdnjkKSyTGmLA6lJPHrNkzqLzgP3Q/NBuVCDY0voIWV/WnQ9MO4Q7P+MESiTEmLFIysvjl5+9otGIIPXQxmVRmw/F30OrKZ+lQr3nRJzClhiUSY0yJStqfydyfRtJ63VCuZz37I2qz5bSnaNnjMU6qWifc4ZljYInEGFMi1u9M4ddvh3DOri+4NmI7e6KOY2fXV2h8/n3UjKka7vBMACyRGGNCJj9fidu4kzWTh3LBnpHcE5FEUrU2JJ0ziIbdboFI+xNUHvj1LYrIo8CXqpoS4niMMeWAqjJ+3np2zhjKjVnf0EWS2VXzJNIu/jcNO14NERHhDtEEkb//HGgELBSRxcBwYIpWtDl6jTFFys9XpizewPZpg7jm4LfUl/3sqXcmhy4dynHtL7F3QMopvxKJqr4oIi8BlwF3A4NEZBzwiapuDGWAxpjSLzM7l5/mryZz9iCuyf6BWpLJ7obdye/1Nxq07h7u8EyI+f186Z5AdrlPLlAHGC8i/ypsfxFpLiIzRGS1iKwSkcdd+UAR2S4iS92nl88xL4hIvIisE5EePuWXu7J4EenvU95aROa78rEiYpMtG1OCMrNz+XTKPMa/dg89p11K35yxHGxyFnn3zaDRwxOJsCRSIfjbRvI4cCewF/gYeFZVc0QkAtgAPFfIYbnA06q6WERqAItEZKrb9o6qvnnYNToAfYCTgSbANBFp5zZ/AFwKJOJVsU1Q1dXAG+5cY0RkCHAvMNjfmzfGHJu9GVn8NGcBleYP4hadTozkkdz6Cqr1fIHjGp0c7vBMCfO3jaQOcJ2qbvUtVNV8EbmysANUdSew0y2ni8gaoOlRrtEbGKOqWcBmEYkHurht8aq6CUBExgC93fkuAm51+4wABmKJxJiQyczO5aufZ1N1wfvcIrOJEEhtdz31L+9P/Xptwh2eCZMiE4mIRAJ9VHVgYdtVdY0f52gFnA7MB7oDj4jInUAc3lNLCl6SmedzWCJ/JJ6Ew8q7AvWAVFXNLWR/Y0wQ7c3IYsLUGTRePojb838lLzKazJPvoPYlz1C/tr2FXtEVmUhUNc+1T7RQ1W3FvYCIVAe+Bp5Q1f0iMhh4BVD38y3gnuKet5gx9AP6AbRo0SKUlzKmXIlPSufbKb/QYcMQ7pLfyY6oRNIp99H48meJqdEo3OGZUqI4VVurRGQBcKCgUFWvPtpBIhKNl0RGquo37pjdPtuHAT+61e2A7z9tmrkyjlC+D6gtIlHuqcR3/z9R1aHAUIDY2FjrtmzMUagqi7elMmn6L3TcPJSnI+aTE1WZ1I4PUfeSp2lcrV64QzSljL+J5KXinlhEBPgEWKOqb/uUN3btJwDXAivd8gRglIi8jdfY3hZYAAjQVkRa4yWKPsCtqqoiMgO4ARgD9AW+L26cxhhPfr4yf3MyE36eSvcdw/lrxAJyo6twqPNjVD3vcSpZAjFH4O97JLOO4dzdgTuAFSKy1JX9FbhFRDrhVW1tAR5w11jl3k1Zjdfj62FVzQMQkUeAKUAkMFxVV7nzPQ+MEZFXgSV4icsYUwyHcvIYF5fA1Bkz6HNwFK9FLiA7phq5XZ8k5pxHialaN9whmlJO/HlBXUS6Af8BTgJi8P6gH1DVmqENL/hiY2M1Li4u3GEYE3ZZuXmMnr+NaTOmc1vWGHpGLiQnqjp0fYDo7o+AJRDjQ0QWqWpsYdv8rdoahFel9BUQi/dOSbujHmGMKZWycvP4etF2pv7yM30yR/NlZBy5lavD2c8T3e1BqGJDuZvi8XvoTVWNF5FIV930qYgsAV4IXWjGmGDKycvn+6U7mDx1MjcdGMWnkYvIqVwDPft5oiyBmAD4m0gy3fAjS92QKDspxvAqxpjwSTuYw+e/bWHBvFnceWgUH0cuIqdyTbT7C0R3/QtUqR3uEE0Z528iuQOvXeQR4Em87rjXhyooY0zgsnLz+GDGRubMncN9eWN4NHIBOZVrkH/2C0Sf9SBUrhXuEE054W+vrYKhUQ4CL4cuHGNMoDKychk9fxtT5vzKbYfG8ETkb2jlqnDWs0Sf9bBVYZmgO2oiEZEVeN10C6WqpwU9ImPMMYlPyuDz37fwW9xi+ul4xkbNgZgYIro9Bmc/DvYeiAmRop5ICh2Q0RhTeixNSOW9aetZs24tj0V/x4DImUhkFBGd/wLnPAnVG4Y7RFPOHTWRHD7arzGmdFBVZq7bw0ezN7Jx0yaeqPwDw6pMJ1IUOeMuOO8ZqNkk3GGaCsLf+UjS+aOKKwaIpoy+kGhMWZaXr8xcl8T70zeQkJjAU1Un0qfqz0RqDtLpVjjvWajTMtxhmgrG38b2GgXLbgyt3kC3UAVljPmz/Hxl8qpdvPXzOvbs2c1T1X7mtmoTico7iJx2M5z/HNh8ICZM/H4hsYCbcvc7ERkA9C9qf2PMsVNV5mzYy1tT17M+YRdP1ZzBXTW+IzonHU6+Fi54ARqcGO4wTQXnb9XWdT6rEXjDpBwKSUTGGFSVufH7+PeUtaxO3Md9Vecwpta3VMnaC+0uh4tehONODXeYxgD+P5Fc5bOcizdqb++gR2NMBZeXr/yyNokhszayeOs+7qy2kC/rfk2NzERodDZcMhJaWK2yKV38bSO5O9SBGFORFbSB/HvKOjbvzeC66qsYVn88dTPWQ41T4dp34YRLQCTcoRrzP/yt2joeeA+vgV2B34EnVXVTCGMzpkJYsi2F1yauZcGWZK6pu43xTcZRL3kxRLeG6z+Bk6+DCBvazpRe/lZtjQI+wJvRELwh5UcDXUMRlDHlXX6+MnvDHobP3cKcDXvoWmUHvzafQLM9syHiOLjibTjjToiMDneoxhTJ30RSVVW/8Fn/UkSeDUVAxpR3v23cy6s/rmH1zv2cXj2FSc0mcOKen5H0mnDJQOjyAMRUDXeYxvitqLG2CqZImyQi/fHmRlfgZmBiiGMzplzZsvcA/5qylokrdnFq7SymnTiZNglfIynR3lAm3R+zARVNmVTUE8kivMRR0ML3gM82xSa2MqZI63en8+nczYyLS6RWZBaj2v7GWbtGIglZcEZf72XCGseFO0xjjllRY221LqlAjClv1u7az3vTNjBp5S6qRClvHb+Uq1JGEJmQBCdd7VVj2dvophzw+812ETkF6ABULihT1c9DEZQxZdnqHfv558Q1/Bq/l2oxEbzTcQdX7/mIyMQN0Lwb9BkJzbuEO0xjgsbf7r8DgAvwEslEoCfwK3DURCIizd0+jfCqwoaq6nuu7WUs0Arv5cabVDXFjeP1HtALyATuUtXF7lx9gRfdqV9V1RGu/EzgM6CKi+1xN4yLMSUqPimDf09Zy5RVu6lVJZo3z8qm954hRK+bB/VOgJtHQvsr7F0QU+74+0RyA9ARWKKqd4tII+BLP47LBZ5W1cUiUgNYJCJTgbuA6ar6umvE7w88j5eg2rpPV2Aw0NUlngF4Q7OoO88EVU1x+9wPzMdLJJcDk/y8L2MCdiArl3enrWf43C1UjorgpbOrcPuBT6m0ZAJUa2BdeU25528iOaiq+SKSKyI1gSS8eduPSlV3AjvdcrqIrAGa4g2vcoHbbQQwEy+R9AY+d08U80Sktog0dvtOVdVkAJeMLheRmUBNVZ3nyj8HrsESiSkBOXn5jF6wjfenx7M3I4u7O9XguaoTqLL0My9pnP88nP0oVKpR5LmMKcv8TSRxIlIbGIbXkysD7+12v4lIK+B0vCeHRi7JAOzCq/oCL8kk+ByW6MqOVp5YSPnh1+4H9ANo0aJFccI2plDrdqXz3NfLWZaQytktqvJ9xyU0XfkRZGd4Tx8XvGA9sUyF4e9YWw+5xSEiMhnvKWC5vxcRkerA18ATqrpffOqIVVVFJKRtGqo6FBgKEBsba+0n5phlZufyyo+rGbswgWqVIvmq+3ZiN7yHLEqEE3vBxQOgYftwh2lMifK3sf1a4BdVTVPVLa7K6RpV/c6PY6PxkshIVf3GFe8WkcaqutNVXSW58u38ucqsmSvbzh9VYQXlM115s0L2NybofovfywvfriAhOZO/npbBXfuHErUoDo47Da4dAq3PDXeIxoSFvyPBDVDVtIIVVU3Fa/w+KtcL6xNgjaq+7bNpAtDXLfcFvvcpv1M83YA0VwU2BbhMROqISB3gMmCK27ZfRLq5a93pcy5jgmJ76kEeG72EWz+eT/28JOa1G8196/oRlZ4AvT+AfjMtiZgKzd82ksISjj/HdgfuAFaIyFJX9lfgdWCciNwLbAVuctsm4nX9jcfr/ns3gKomi8grwEK3398LGt6Bh/ij++8krKHdBEn6oRw+m7uFD2bGU1kPMbrNLLrtHo1sx5sbvfsTUKl6uMM0JuzEn1cuRGQ4kIo3AjDAw0BdVb0rdKGFRmxsrMbFxYU7DFOK5ecrIxds492p60k5cIgBzZZye+bnRGYmwSk3eG+k1y6y06Ix5YqILFLV2MK2+ftE8ijwEt5LhABT8ZKJMeXKqh1p/N/3q1i0NYW7miTwXK0RVN27Gpp1hltGQfPO4Q7RmFLH315bB4D+7qVCVdWM0IZlTMlKP5TDsDmbGfTLBtpX2suvLb+j2e5foGYzb3KpU663N9KNOQJ/e22dijfUSV23vhfoq6orQxibMSGnqnwVl8g/J60hOzOdj5pM45K08UhyNFz0Ipz1CERXCXeYxpRq/lZtfQQ8paozAETkArz3Ms4OUVzGhNzyxFTemLyWufF7eaLhMh6qPIKY5N3Q8RavHcReKDTGL/4mkmoFSQRAVWeKSLUQxWRMSKVl5vDBzHg+nrOJblUSmddoFMelLYXGneCWL21kXmOKyd9EsklEXgIKptu9HdgUmpCMCY1DOXkMn7uZYbM3oZnJjGoyka4pPyA5deHq/0Cn2yHC31erjDEF/E0k9wAvAwVvps92ZcaUeqrKnA17+b/vV5KwL53/a7yA26I/Jyolw5sf/YL+UKV2uMM0pszyN5G0VtXHQhqJMSGwcnsar01aw9z4fVxZaxM/Nh5J9ZQ10Po86PkvaHhSuEM0pszzN5G8JSLHAeOBsdZby5R2iSmZvDF5HT8s28GJVfYzrcW3nJA0BSo3h5s+96a6te68xgSFv++RXOgSyU3AR25OkrGq+mpIozOmmPLylVHzt/LapLVEaA6fnTif83d8giTnw/n9ofvjEFM13GEaU674PWe7qu4C3heRGcBzwP8BlkhMqbFxTwZPj1vG0oRU7mm+i/75Q4nZuhba9YSer0OdVuEO0Zhyyd8XEk8CbgauB/bhDZXydAjjMsZvGVm5fDgjnk9+3cxxURnMOOFHWid+B7WaQ5/R0L5XuEM0plzz94lkODAG6KGqO0IYjzHFMmv9Hv76zQp2ph3gH80XcXPap0TsOADnPAXnPQMx9rqTMaHmbxvJWaEOxJjiSEjOZOCEVUxfm8RldXYxpekIqictg1bnwhVvQYMTwx2iMRWG320kxpQGuXn5vP9LPB/N2kgNyWTC8ZM5ded4JKI+XPcxnHqD9cYypoRZIjFlxqKtyTw9bhlb9h1gYOu13JE2hMid+6Dz/XDhX+2lQmPC5KiJRES+UNU7RORxVX2vpIIyxte+jCzemLyWcXGJdKyeyqJWo6m3cw40OQNu/wqanB7uEI2p0Ip6IjlTRJoA94jI58Cf6gx8prs1JuiycvMYNnsTQ2ZtIjsnm0/azueiXZ8gyRHeW+md74OIyHCHaUyFV1QiGQJMB44HFvHnRKKu3Jig27Qng2fHL2fR1hTuPT6NZ7M/oHLCSu+dkCvehFrNwh2iMcY5aiJR1ffxXkIcrKoPllBMpgLLz1fem76BIbM2Uisqm6kdptN28xdQrYENbWJMKeVv998HRaQjcK4rmq2qy0MXlqmIEpIz+dt3K5m9fg/PttnGA+mDiNqUCGfe7U00ZY3pxpRKfk2+ICKPASOBhu4zUkQe9eO44SKSJCIrfcoGish2EVnqPr18tr0gIvEisk5EeviUX+7K4kWkv095axGZ78rHikiMf7dtSpP8fOXtqeu55O1ZbNq8iV9afcHD2/sTFVMN7p4MV71rScSYUkxUteidRJYDZ6nqAbdeDfhdVU8r4rjzgAzgc1U9xZUNBDJU9c3D9u0AjAa6AE2AaUA7t3k9cCmQCCwEblHV1SIyDvhGVceIyBBgmaoOPlpMsbGxGhcXV+Q9m5KxYXc6L3yzgrityQxosYK+aUOIyM2Ec5+Bc56AqErhDtEYA4jIIlWNLWybv++RCJDns57HYT24CqOqs0WklZ/X6A2MUdUsYLOIxOMlFYB4Vd0EICJjgN4isga4CLjV7TMCGAgcNZGY0iEvX/l07mb+NWUdLaNS+b3FaBonzYLmXb3ZCu3NdGPKDH8TyafAfBH51q1fA3wSwHUfEZE7gTjgaVVNAZoC83z2SXRlAAmHlXcF6gGpqppbyP5/IiL9gH4ALVq0CCBsEwxrdu6n/zcrWJaQwoCmi+mbPpSI5Fzo8Rp0fcC69BpTxvjb2P62iMwEznFFd6vqkmO85mDgFbzuw68AbxHiaXtVdSgwFLyqrVBeyxxZTl4+b/68jo/nbKZdpRTmN/+SRnt+g5bdvaeQem3CHaIx5hgUZz6SxcDiQC+oqrsLlkVkGPCjW90ONPfZtZkr4wjl+4DaIhLlnkp89zelzJa9B3ju6+Us3LyXt45fwrV7hiCpQPwxtpUAABkjSURBVK83IfZeiPCr34cxphQq8bG2RKSxqu50q9cCBT26JgCjRORtvMb2tsACvLaYtiLSGi9R9AFuVVV1k2zdgDfEfV/g+5K7E+OPQzl5fDgjnsGzNnJC1F7mN/2ChjsWQOvz4er3bbIpY8qBkCYSERkNXADUF5FEYABwgYh0wqva2gI8AKCqq1wvrNVALvCwqua58zwCTAEigeGquspd4nlgjIi8CiwhsHYbE2Qrt6fxwBeL2JF6gH+3XMD1+4Yh6VFw5btw5l32YqEx5YRf3X8BRKQl0FZVp4lIFSBKVdNDGl0IWPff0FNVPvl1M/+avI6Tqu3n0zqfUXf3b9DmYrjqPajdvOiTGGNKlYC7/4rI/Xi9nuoCbfDaI4YAFwcrSFM+7D+Uw/PjlzNp5U7+2mwF96V/6PXIuvId7w11ewoxptzxt2rrYbx3OuYDqOoGEWkYsqhMmbQ8MZVHRi0hI2UX05uPp82ead57IdcMth5ZxpRj/iaSLFXNFvevSRGJwmvjMIbcvHw+nLmRd6et5+oqy/l3rY+J3pfmjY919mP2Xogx5Zy/iWSWiPwVqCIilwIPAT+ELixTVsQnpdP/6xWs2bqDLxp+Q/f9P0GtU+Da7+G4U8IdnjGmBPibSPoD9wIr8HpZTVTVYSGLypR62bn5DJ65kUEzNnB29AYW1B1K1fQdcM6TcMELNkaWMRWIv4nkNrxxsP6bPETkSlX98SjHmHJq3qZ9vPLjatbuSOE/TabSM+VLpFIL6DMJWnQLd3jGmBLm7+vE/wHmiMhJPmV/D0E8phTLys3jtUlr6DN0HjHpCcQ1eYteyZ8jp/WBv/xqScSYCsrfJ5LNeFVb40VkoKp+hR+j/5ryY0fqQf7y5SKWJ6bxetu13Jz0DnJA4PpP4NQbwh2eMSaM/E0kqqqLReR8YLSIdMV7y9yUc6rK1NW76f/NCqJy0pnb7juabvve69Z73TCo0zLcIRpjwszfRLITQFX3upkL3wCsS045l5Wbx9++Xcn4RYn0qrudd6sNIiYhAc7vD+c9C5ElPlSbMaYU8ncY+St8lvOBZ93HlFO70g7x4MhFLN2WzBftfuOcxKFIjcZw10RoeVa4wzPGlCJHTSQi8q6qPiEiP1DIC4iqenXIIjNhoap8FZfIPyetoUpOCgtajKDBtrlw8rXeYIs2d7ox5jBFPZF84X6+edS9TLmw/1AOr/ywmq8WJXJH4+0MyH6TqH2pNlqvMeaojppIVHWR+zkLQESi8dpGtqtqUujDMyVlw+507v88jq37MvjshLmcv30oUqcl3D4eGp8W7vCMMaXYUd8jEZEhInKyW64FLAM+B5aIyC0lEJ8pAbPX7+GGIb8TeSiZJW2GcUHiYKTD1dBvliURY0yRinoh8VyfSaTuBtar6qnAmcBzIY3MhFxevvL6pLXcOXwBF1TZyJQqf6P2rt/hirfghk+hcs1wh2iMKQOKaiPJ9lm+FPgKQFV3idWXl2lpB3N4YswSZqxL4oPWv9Fr1xCkdnO4dSo06RTu8IwxZUhRiSRVRK7Emyu9O97b7QXDyFcJcWwmRBJTMuk7fAF7k5OZ2Wo0rXb+DCddBb0/gMq1wh2eMaaMKSqRPAC8DxwHPKGqu1z5xcBPoQzMhMbMdUk8PW4Zx+Vt57f6g6i2eyNc8jJ0f9x6ZRljjklRvbbWA5cXUj4FmBKqoEzwqSqf/baFV35czR111/B/Oe8SmR0Nt38DbS4Md3jGmDLM39F/j4mIDBeRJBFZ6VNWV0SmisgG97OOKxcReV9E4kVkuYic4XNMX7f/BhHp61N+poiscMe8L9ZwU6icvHyeHreMv/+wkncaTuLlA68QWe946DfTkogxJmAhTSTAZ/zvE01/YLqqtgWmu3WAnkBb9+kHDAYv8QADgK5488YPKEg+bp/7fY77n6enii7tYA73jYhj2pJ1TG88mN5pX0DHW+GeKTbgojEmKEKaSFR1NpB8WHFvYIRbHgFc41P+uXrmAbVFpDHQA5iqqsmqmgJMBS5322qq6jxVVbz3W67B/FdCciY3DP6N7fHL+bXuPzg+bT70ehOu+RCira+EMSY4ip1IRCTQWREbqepOt7wLaOSWmwIJPvslurKjlScWUm6ARVuT6fX+HI7fP5/J1QZSkwPQ90focr81qhtjgupYnkiC9sfaPUn8z2CQwSYi/UQkTkTi9uzZE+rLhd3I+VvpM/R37on+mSHyOlF1WsD9v9iovcaYkDiWRLIkwGvudtVSuJ8FY3ZtB5r77NfMlR2tvFkh5f9DVYeqaqyqxjZo0CDA8EsvVeXNKesY8O1SPqozkidzPkba9YB7rT3EGBM6xU4kqnpPgNecABT0vOoLfO9TfqfrvdUNSHNVYFOAy0SkjmtkvwyY4rbtF5FurrfWnT7nqnBUldcnr+XLGUuYXO8dLsr4Cc55Em4eCZVqhDs8Y0w5FtIp7kRkNHABUF9EEvF6X70OjBORe4GtwE1u94lALyAeyMQb2wtVTRaRV4CFbr+/q2pBA/5DeD3DqgCT3KfCUVXembqen2fPZVrNd6h3KAmu/Qg69gl3aMaYCkC8ZoqKIzY2VuPi4sIdRtDk53tPInFzJvNF1XeoWika6TMKWnQNd2jGmHJERBapamxh22zS7TIsL1958buVpMSNZ2zlD4mq1Ry5fTzUPT7coRljKpCiptq908/zLFXV5UGIx/gpNy+f/t+soObSYfwz5kto2hm5ZQxUqxfu0IwxFUxRTySt/TzPlgDjMMVwICuXJ0Yvolv8O9wbPQnaXwnXf2wvGRpjwqKoRPJPVc0pkUiMX5IPZHPXsDk8sO91rohaAF0fhB7/gIjIcIdmjKmgikok20VkAjAa+EUrWst8KZOYkskDH8/kxfRXOStyFfT4J5z1cLjDMsZUcEW9R3ISXrfbF4EEEXnPveNhStjyxFT6DprC6xkv0i1yLVw71JKIMaZUOGoiUdV9qvqRql6IN/LuJuAdEdkoIv8okQgNi7el8OTHk/g4/yVOjkpE+oyEjjeHOyxjjAGK8Wa7qu4APsEbuj0duC9UQZk/rEhM4+XPfuBLeYmW0SlE3P41nNgz3GEZY8x/FZlIRKSyiNwoIt/gvXV+Ed4cIk1CHVxFF5+UwcBhYxme/xINK+URcdeP0PrccIdljDF/UtR7JKOAS4BZwEjgVlU9VBKBVXRb9h7g1Y/H8om8TPVqNYi8awI0aBfusIwx5n8U1WtrMvCAqqaXRDDGs3XfAV4aMpJBuQOpUq0WUff+ZG+rG2NKraISyUygYNTdo0lV1f3BCali25F6kFeHjuKD3IFUqV6b6Ht+grr+vhdqjDElr6hEMqKI7eBNTPUZ3lS3JgD7MrJ4efDnvHVoIJVq1CX63ok2j4gxptQ7aiJx3X5NCTiQlcvfP/qSNw8NIKZGfSrdNxFqtwh3WMYYUyQb/bcUyMjK5dXh43l5/0tEVa9HpfsnQ61mRR9ojDGlwLFMtWuCKC9fee3LH3l613NUqlyNKvf+aEnEGFOm2BNJmA36dgYPbnuaGjERVL73B2tYN8aUOZZIwmji70u5atmD1I86SOW7J0LD9uEOyRhjis0SSZgsWLeVlpP60iQyhag7v4MmncIdkjHGHBNrIwmDTbtSyBl9B+0jtpF3/QiiWp0d7pCMMeaYWSIpYbm5eWz89D66s4y0i/9NtVNsAEZjTNkWtkQiIltEZIWILBWROFdWV0SmisgG97OOKxcReV9E4kVkuYic4XOevm7/DSLSN1z3469ZHz/DpVnT2HDSQ9Q91wZQNsaUfeF+IrlQVTupaqxb7w9MV9W2wHS3DtATaOs+/fCGskdE6gIDgK5486UM8GM4l7CZNe49Lt41nGX1rqDtTf8MdzjGGBMU4U4kh+vNH8OyjACu8Sn/XD3zgNoi0hjoAUxV1WRVTQGmApeXdND+WDJ3Ct1W/Z01lU/nlL98BiLhDskYY4IinIlEgZ9FZJGI9HNljVR1p1veBTRyy02BBJ9jE13Zkcr/RET6iUiciMTt2bMnmPfgl12Jm2j2cz/2RdSn1V++IjI6psRjMMaYUAln999zVHW7iDQEporIWt+NqqoiosG4kKoOBYYCxMbGBuWc/srPyiTjs5tozCHSbvmWKrUblOTljTEm5ML2RKKq293PJOBbvDaO3a7KCvczye2+HWjuc3gzV3ak8tJBlc2f3ccJuRtY2vlfNGl3RtHHGGNMGROWRCIi1USkRsEycBmwEpgAFPS86gt875YnAHe63lvdgDRXBTYFuExECuZMucyVlQp7Zw2lzc6f+LrmHZx9xZ3hDscYY0IiXFVbjYBvxWtwjgJGqepkEVkIjBORe4GtwE1u/4lAL7w54zOBuwFUNVlEXgEWuv3+rqrJJXcbR5a/Yzk1Z/6NuXoaZ939BmKN68aYciosiURVNwEdCynfB1xcSLkCDx/hXMOB4cGOMSBZ6aR9cRvZWo2dF79H9zrVwh2RMcaETGnr/lsu7P/6MWpmJjC6+QCuP/f0cIdjjDEhZYkkyHTlN9Rc/w1DI27kjj63WZWWMabcs9F/gyl9N9nfP8ma/OOp2+sF6lWvFO6IjDEm5OyJJFhUOfTto2jOAYbVe54bu9gEVcaYisESSbAsH0vlTVN4J78PT996JRERVqVljKkYrGorGDKTyZ74Aivy21Lnosc5vkH1cEdkjDElxp5IgiB36stEZqUypPrD3Htum3CHY4wxJcoSSaAS44hcMoLPcntw13VXER1p/0mNMRWL/dULhCrZPz5LktZmdbuH6X5C/XBHZIwxJc4SSSDW/EDMrsW8m3cjT10dW/T+xhhTDllj+7HKyyX754Fs1aZU6Xw7TWtXCXdExhgTFvZEcqyWjSYmdSP/4RYevKh9uKMxxpiwsSeSY5GfT9ast9mQ34pW3W+kQQ17g90YU3HZE8mxWPcTldI2MZze3HPu8eGOxhhjwsqeSI5B9ux32aUNqdbpWmpXtfnXjTEVmz2RFNfu1cTsjGNE3mXcf/6J4Y7GGGPCzp5Iiilv0efkE8We1tfSol7VcIdjjDFhZ4mkOPJyyV06hml5Z3DdOaeFOxpjjCkVrGqrOBLmUyk7hd+rnM95bRuEOxpjjCkV7ImkGDJX/kiURlL31MttmHhjjHEskRRD5rpfWJ9/Ild1sUZ2Y4wpUC6qtkTkchFZJyLxItI/JBfJOUjt9A1sq3oybRvVCMkljDGmLCrziUREIoEPgJ5AB+AWEekQ7Osc3LmGKPKIaX56sE9tjDFlWplPJEAXIF5VN6lqNjAG6B3si2zZtAGAFq3bBfvUxhhTppWHRNIUSPBZT3Rl/yUi/UQkTkTi9uzZc0wX0WoNmFu9Bye0DfrDjjHGlGkVorFdVYcCQwFiY2P1WM7RofNF0PmioMZljDHlQXl4ItkONPdZb+bKjDHGlIDykEgWAm1FpLWIxAB9gAlhjskYYyqMMl+1paq5IvIIMAWIBIar6qowh2WMMRVGmU8kAKo6EZgY7jiMMaYiKg9VW8YYY8LIEokxxpiAWCIxxhgTEEskxhhjAiKqx/R+XpklInuArcd4eH1gbxDDCafyci92H6WL3UfpE6x7aamqhU7EVOESSSBEJE5VY8MdRzCUl3ux+yhd7D5Kn5K4F6vaMsYYExBLJMYYYwJiiaR4hoY7gCAqL/di91G62H2UPiG/F2sjMcYYExB7IjHGGBMQSyTGGGMCYonETyJyuYisE5F4Eekf7ngKiMgWEVkhIktFJM6V1RWRqSKywf2s48pFRN5397BcRM7wOU9ft/8GEenrU36mO3+8O1aCFPdwEUkSkZU+ZSGP+0jXCPJ9DBSR7e47WSoivXy2veBiWiciPXzKC/39ctMjzHflY91UCYhIJbce77a3CvA+movIDBFZLSKrRORxV16mvpOj3EdZ/E4qi8gCEVnm7uXlY71+sO7xiFTVPkV88Ian3wgcD8QAy4AO4Y7LxbYFqH9Y2b+A/m65P/CGW+4FTAIE6AbMd+V1gU3uZx23XMdtW+D2FXdszyDFfR5wBrCyJOM+0jWCfB8DgWcK2beD+92pBLR2v1ORR/v9AsYBfdzyEOBBt/wQMMQt9wHGBngfjYEz3HINYL2Lt0x9J0e5j7L4nQhQ3S1HA/Pdf79iXT+Y93jEWIPxR6G8f4CzgCk+6y8AL4Q7LhfLFv43kawDGrvlxsA6t/wRcMvh+wG3AB/5lH/kyhoDa33K/7RfEGJvxZ//AIc87iNdI8j3MZDC/2j96fcGbw6ds470++X+kOwFog7/PSw41i1Huf0kiN/N98ClZfU7KeQ+yvR3AlQFFgNdi3v9YN7jkT5WteWfpkCCz3qiKysNFPhZRBaJSD9X1khVd7rlXUAjt3yk+zhaeWIh5aFSEnEf6RrB9oir8hnuU1VT3PuoB6Sqau5h5X86l9ue5vYPmKsSOR3vX8Bl9js57D6gDH4nIhIpIkuBJGAq3hNEca8fzHsslCWSsu8cVT0D6Ak8LCLn+W5U758UZa6Pd0nEHcJrDAbaAJ2AncBbIbhGSIhIdeBr4AlV3e+7rSx9J4XcR5n8TlQ1T1U7Ac2ALkD7MIdUKEsk/tkONPdZb+bKwk5Vt7ufScC3eL9su0WkMYD7meR2P9J9HK28WSHloVIScR/pGkGjqrvdH4B8YBjed3Is97EPqC0iUYeV/+lcbnstt/8xE5FovD++I1X1G1dc5r6Twu6jrH4nBVQ1FZiBV81U3OsH8x4LZYnEPwuBtq4nQwxeQ9aEMMeEiFQTkRoFy8BlwEq82Ap6y/TFqyfGld/petx0A9JclcIU4DIRqeMe+S/DqxPdCewXkW6uh82dPucKhZKI+0jXCJqCP4rOtXjfScG1+7jeNa2BtngN0IX+frl/nc8AbigkXt/7uAH4xe1/rDEL8AmwRlXf9tlUpr6TI91HGf1OGohIbbdcBa+tZ80xXD+Y91i4YDVqlfcPXi+V9Xh1lH8LdzwupuPxelosA1YVxIVXxzkd2ABMA+q6cgE+cPewAoj1Odc9QLz73O1THov3P91GYBDBazwcjVfFkINXB3tvScR9pGsE+T6+cHEud/8TN/bZ/28upnX49IA70u+X+44XuPv7Cqjkyiu79Xi3/fgA7+McvCql5cBS9+lV1r6To9xHWfxOTgOWuJhXAv93rNcP1j0e6WNDpBhjjAmIVW0ZY4wJiCUSY4wxAbFEYowxJiCWSIwxxgTEEokxxpiAWCIxZZaIqIi85bP+jIgMLOY5PhNvVNhKbr2+iGwJUnwXiMiPwThXEddp4EZqXSIi5x627QkRqerHOTJCF6Ep7yyRmLIsC7hOROoHeJ48vHcfShURifRz14uBFap6uqrOOWzbE3gD/hkTMpZITFmWizcf9ZMBnudd4EmfISGA/32iEJFBInKXW94iIq+JmwdGRM4QkSkislFE/uJzmpoi8pN4cz4MEZEId/xlIvK7iCwWka/c2FAF531DRBYDNx4WTysR+UW8gQeni0gLEemENwx7bxdLFZ/9HwOaADNEZIYru0W8OUFWisgbh/+HcE9kv4vIFW79WRFZ6K75sk8ca0RkmHjzZPxccF0ReUy8uUCWi8iYY/s6TFljicSUdR8At4lIrQDOsQ34FbijuMepN6DeHOAzvCElugEv++zTBXgUb06INvzxBPUicIl6A27GAU/5HLNPVc9Q1cP/EP8HGKGqpwEjgfdVdSnwf3hzT3RS1YMFO6vq+8AO4EJVvVBEmgBvABfhDV7YWUSuKdhfRBoBP+G9Qf2TiFyGN5xGF7f/mfLHoKBtgQ9U9WQgFbjelfcHTncx+iZUU45FFb2LMaWXqu4Xkc+Bx4CDRe1/FK/hjSf0UzGOKRhvbQXeBETpQLqIZBWMkQQsUNVNACIyGm8Ij0N4iWWuNzQUMcDvPucde4TrnQVc55a/wHsSKY7OwExV3ePiGYk3Mdd3eBMnTQceVtVZbv/L3GeJW6+Ol0C2AZtdEgNYhDcnC3jDeYwUke/ceU0FYInElAfv4k3682lhG0VkCt4cF3Gqel9h+6jqBvHmfbjJpziXPz+1Vz7ssCz3M99nuWC94P+tw8cgUrxxqqaq6i2F3g0cOEJ5KOXiJYQeQEEiEeA1Vf3Id0fx5vnwvd88oKBK7Qq85HQV8DcROVX/mNfClFNWtWXKPFVNxpsa9N4jbO/hqn0KTSI+/gE847O+FejgRk2tjdeoXVxd3OiqEcDNeFVo84DuInIC/HcU53Z+nOs3vBFaAW7Dq1IrSjrelLPgDcJ3vmsHicSbpbAgaSheh4P2IvK8K5sC3OPTftNURBoe6ULuHpur6gzgebxhzKv7EaMp4+yJxJQXbwGPBHICVV3lGrnPcOsJIjIOb+TVzfxRxVMcC/FGuj0Bb2jub1U13zXajy7odozXZrK+iHM9CnwqIs8Ce4C7/bj+UGCyiOxw7ST9XRwC/KSq/x0eXFXzROQWYIKIpKvqhyJyEvC7q4LLAG7HewIpTCTwpWuvErw2nFQ/YjRlnI3+a4wxJiBWtWWMMSYglkiMMcYExBKJMcaYgFgiMcYYExBLJMYYYwJiicQYY0xALJEYY4wJyP8D0Ts/I8W7IIQAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.plot(N, V)\n",
"\n",
"k = 10.34\n",
"beta = 0.64\n",
"plt.plot(N, k * (N**beta))\n",
"plt.xlabel('N - Number of tokens')\n",
"plt.ylabel('|V| - Size of vocabulary')\n",
"plt.title('Heaps\\' Law')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
"import pandas as pd\n",
"import re\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"from bs4 import BeautifulSoup"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv('tweets-dataset.csv')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sentence</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <t{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# ASSIGNMENT 1\n",
"NLP\n",
"Akhilesh Ravi\n",
"16110007"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import re\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"from bs4 import BeautifulSoup"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv('tweets-dataset.csv')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sentence</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>@BubblyDentist @MeetUunngLee nahi nahi, mere s...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>going to the grammys first entertainment law b...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>bohot hi badiya ji aap sunao?</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Parvez Musharraf is Digvijay Singh of Pakistan...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>guddu ko bass john cena k sticker ki padii hai...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Aman ki maa ki... Asha https://twitter.com/ash...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>pakistan can wait more more and more . . . ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>@sagarcasm Jai Mahesh !!</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>RT AAP_Najafgarh \" RT AAPInNews : When DDCA lo...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>\"Kaam ho jayega, thoda kharcha paani lagega\" \\...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sentence\n",
"0 @BubblyDentist @MeetUunngLee nahi nahi, mere s...\n",
"1 going to the grammys first entertainment law b...\n",
"2 bohot hi badiya ji aap sunao?\n",
"3 Parvez Musharraf is Digvijay Singh of Pakistan...\n",
"4 guddu ko bass john cena k sticker ki padii hai...\n",
"5 Aman ki maa ki... Asha https://twitter.com/ash...\n",
"6 pakistan can wait more more and more . . . ...\n",
"7 @sagarcasm Jai Mahesh !!\n",
"8 RT AAP_Najafgarh \" RT AAPInNews : When DDCA lo...\n",
"9 \"Kaam ho jayega, thoda kharcha paani lagega\" \\..."
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[:10]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# s = re.sub('[^0-9a-zA-Z]+', '*', s)\n",
"# https://stackoverflow.com/questions/12985456/replace-all-non-alphanumeric-characters-in-a-string"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def cleanText(raw_text):\n",
" '''\n",
" Convert a raw review to a cleaned review\n",
" '''\n",
" text = BeautifulSoup(raw_text, 'html').get_text() #remove html\n",
" words = text.split()\n",
" words = [w for w in words if '@' not in w and '#' not in w] # remove the @-words and #-words\n",
" text = ' '.join(words)\n",
" letters_only = re.sub('[^a-zA-Z]+', ' ', text) # remove non-character\n",
" \n",
" return( letters_only.lower())\n",
"\n",
"vclean = np.vectorize(cleanText)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sentence</th>\n",
" <th>Cleaned sentence</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>@BubblyDentist @MeetUunngLee nahi nahi, mere s...</td>\n",
" <td>nahi nahi mere saath jaakar pachtaogi ye uunng...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>going to the grammys first entertainment law b...</td>\n",
" <td>going to the grammys first entertainment law b...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>bohot hi badiya ji aap sunao?</td>\n",
" <td>bohot hi badiya ji aap sunao</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Parvez Musharraf is Digvijay Singh of Pakistan...</td>\n",
" <td>parvez musharraf is digvijay singh of pakistan...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>guddu ko bass john cena k sticker ki padii hai...</td>\n",
" <td>guddu ko bass john cena k sticker ki padii hai...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Aman ki maa ki... Asha https://twitter.com/ash...</td>\n",
" <td>aman ki maa ki asha https twitter com ashabhos...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>pakistan can wait more more and more . . . ...</td>\n",
" <td>pakistan can wait more more and more aakhir pa...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>@sagarcasm Jai Mahesh !!</td>\n",
" <td>jai mahesh</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>RT AAP_Najafgarh \" RT AAPInNews : When DDCA lo...</td>\n",
" <td>rt aap najafgarh rt aapinnews when ddca lowere...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>\"Kaam ho jayega, thoda kharcha paani lagega\" \\...</td>\n",
" <td>kaam ho jayega thoda kharcha paani lagega sir...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sentence \\\n",
"0 @BubblyDentist @MeetUunngLee nahi nahi, mere s... \n",
"1 going to the grammys first entertainment law b... \n",
"2 bohot hi badiya ji aap sunao? \n",
"3 Parvez Musharraf is Digvijay Singh of Pakistan... \n",
"4 guddu ko bass john cena k sticker ki padii hai... \n",
"5 Aman ki maa ki... Asha https://twitter.com/ash... \n",
"6 pakistan can wait more more and more . . . ... \n",
"7 @sagarcasm Jai Mahesh !! \n",
"8 RT AAP_Najafgarh \" RT AAPInNews : When DDCA lo... \n",
"9 \"Kaam ho jayega, thoda kharcha paani lagega\" \\... \n",
"\n",
" Cleaned sentence \n",
"0 nahi nahi mere saath jaakar pachtaogi ye uunng... \n",
"1 going to the grammys first entertainment law b... \n",
"2 bohot hi badiya ji aap sunao \n",
"3 parvez musharraf is digvijay singh of pakistan... \n",
"4 guddu ko bass john cena k sticker ki padii hai... \n",
"5 aman ki maa ki asha https twitter com ashabhos... \n",
"6 pakistan can wait more more and more aakhir pa... \n",
"7 jai mahesh \n",
"8 rt aap najafgarh rt aapinnews when ddca lowere... \n",
"9 kaam ho jayega thoda kharcha paani lagega sir... "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['Cleaned sentence'] = vclean(data['Sentence'])\n",
"data[:10]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 nahi nahi mere saath jaakar pachtaogi ye uunng...\n",
"1 going to the grammys first entertainment law b...\n",
"2 bohot hi badiya ji aap sunao \n",
"3 parvez musharraf is digvijay singh of pakistan...\n",
"4 guddu ko bass john cena k sticker ki padii hai...\n",
"5 aman ki maa ki asha https twitter com ashabhos...\n",
"6 pakistan can wait more more and more aakhir pa...\n",
"7 jai mahesh \n",
"8 rt aap najafgarh rt aapinnews when ddca lowere...\n",
"9 kaam ho jayega thoda kharcha paani lagega sir...\n",
"Name: Cleaned sentence, dtype: object"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[:10]['Cleaned sentence']"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"def tokenize(s):\n",
" return tuple(s.split())"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of tokens: 303161\n",
"Number of word types: 32707\n"
]
}
],
"source": [
"tokens = []\n",
"for i in range(len(data)):\n",
" tokens.extend( tokenize( data.iloc[i]['Cleaned sentence']) )\n",
"sorted_tokens = sorted(tokens)\n",
"word_types = list(set(tokens))\n",
"print('Number of tokens:', len(tokens))\n",
"print('Number of word types:', len(word_types))"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TTR: 0.10788656852299604\n"
]
}
],
"source": [
"print('TTR:', len(word_types)/len(tokens))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 2. Zipf's Law"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"token_count = {}\n",
"for s in sorted_tokens:\n",
" if s in token_count:\n",
" token_count[s] += 1\n",
" else:\n",
" token_count[s] = 1"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Top 50 Word types in decreasing order of number of occurences:\n",
"\n"
]
},
{
"data": {
"text/plain": [
"[('hai', 10030),\n",
" ('to', 4154),\n",
" ('ki', 3224),\n",
" ('ke', 3170),\n",
" ('nahi', 3169),\n",
" ('bhi', 2929),\n",
" ('the', 2866),\n",
" ('se', 2601),\n",
" ('ho', 2365),\n",
" ('ka', 2310),\n",
" ('bhai', 2266),\n",
" ('ko', 2208),\n",
" ('me', 1955),\n",
" ('ye', 1869),\n",
" ('kya', 1815),\n",
" ('hi', 1801),\n",
" ('aur', 1797),\n",
" ('twitter', 1760),\n",
" ('com', 1724),\n",
" ('kar', 1681),\n",
" ('i', 1509),\n",
" ('in', 1387),\n",
" ('t', 1319),\n",
" ('https', 1310),\n",
" ('is', 1296),\n",
" ('mein', 1276),\n",
" ('a', 1202),\n",
" ('ek', 1165),\n",
" ('and', 1126),\n",
" ('status', 1108),\n",
" ('of', 1074),\n",
" ('on', 1071),\n",
" ('na', 1026),\n",
" ('s', 1009),\n",
" ('ab', 969),\n",
" ('toh', 963),\n",
" ('rt', 944),\n",
" ('tha', 937),\n",
" ('http', 905),\n",
" ('for', 885),\n",
" ('you', 885),\n",
" ('aaj', 873),\n",
" ('co', 872),\n",
" ('raha', 868),\n",
" ('par', 826),\n",
" ('ne', 824),\n",
" ('aap', 820),\n",
" ('hain', 816),\n",
" ('koi', 802),\n",
" ('kuch', 801)]"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sorted_token_count = list(token_count.items())\n",
"sorted_token_count.sort(key=lambda x:x[1], reverse=True)\n",
"print('Top 50 Word types in decreasing order of number of occurences:\\n')\n",
"sorted_token_count[:50]"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 1.0, \"Zipf's Law\")"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAEWCAYAAABMoxE0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3deZhcdZ3v8fcnTQeazWbpyUAHSMTcMCBCsGURFxYliCJ5GFBcxoA43OfKdZ8oudd5wqKCExVQR53IrgyrERHQGAGRywyBxLAFCGRQSJpAIiQgmsEs3/vH+RWp7nR3ne7T1dWn+/N6nnqqzq/OqfM9RZFv/85vU0RgZmY2UGMaHYCZmZWbE4mZmRXiRGJmZoU4kZiZWSFOJGZmVogTiZmZFeJEYjYAkn4haXrOfcdJ+q2kP0n6Zr1jMxtqWzU6ALPhRtJHgH/r4a3tgFkRcW5EvKcfH3kG8Edgx4gISWcDRMTZ/YwrgEkRsaw/x5nVm2skZt1ExNURsX31A/gs8DzwwwF85F7Ao+HRvzZCOZGY1SBpCnARcEpErExlv5H0ifT6VEn3SPqupJckPS7p6PTeFcB04IuSXpH0rm6fvaukWyStlfSipLsl9ev/S0l7S7pD0guS/ijpakmt6b3TJP28at8nJd1Qtb1c0oED+mLMEt/aMutD+gf5RuC8iPhNH7sekvbbFTgRmCtpYkScKglgRUR8Oe3766rjvgCsANrS9qFAf2suAs4HfgvsCPwEOJusFnUXcGFKTn8LjAUOS9f2emB74KF+ns+sC9dIzHqhLANcBTwC/EuN3VcBF0XE+oi4DlgKvDfHadYDuwF7pWPv7u8tsIhYFhHzI+LViFgNfAt4Z3rvKeBPwIHAO4B5wLOS9kn73B0Rm/pzPrPuXCMx692XgP2AN+f4x72z2z5PA7vnOMdsstrDr1LNZU5EXNCfICWNAy4G3g7sQPYH4pqqXe4CjgDekF6vJUsih6Vts0JcIzHrgaQjgP8LnBQRa3Mc0p5qMBV7As/WOigi/hQRX4iI1wPvBz5faV/ph6+R3Q7bPyJ2BD5KdruropJI3p5e30WWSN6JE4kNAicSs24k7QZcC3w2IhbnPOxvgE9LapZ0MvB3wG05zvU+SW9ISeglYCPQ162msZK2qXo0kdVCXgFektQOzOh2zF3AkUBLRKwA7gaOBXYB8l6fWa+cSMy29I/AOODi1NOq+vGDXo5ZAEwiGy/yVbKazAs5zjWJrPH9FeA/ge9FxJ197L8EWFf1OA04BziILBHdCsytPiAinkiff3fafhl4CrgnIjbmiNGsT3LXdrNiJJ0KfCIi3tboWMwawTUSMzMrxInEzMwK8a0tMzMrxDUSMzMrZNQNSNx1111jwoQJjQ7DzKxUFi1a9MeIaOvpvVGXSCZMmMDChQsbHYaZWalIerq393xry8zMCnEiMTOzQpxIzMysECcSMzMrxInEzMwKGXW9turppsWdzJ63lGfXrmP31hZmTJ3MtCntjQ7LzKyu6lojkXSZpFWSHqkq21nS/LR29HxJO6VySfq2pGWSHpJ0UNUx09P+T0qaXlX+ZkkPp2O+3W09iCF10+JOZs59mM616wigc+06Zs59mJsWdzYqJDOzIVHvW1tXkK17UO0s4PaImATcnrYB3kM2pfYk4Azg+5AlHmAW2ZrYBwOzKskn7fOPVcd1P9eQmT1vKevWd52Re936jcyet7RBEZmZDY26JpKI+C3wYrfiE4Ar0+srgWlV5VdF5l6gNS0wNBWYHxEvRsQaYD5wbHpvx4i4Ny1xelXVZw25Z9eu61e5mdlI0YjG9nERsTK9fo5sASGAdmB51X4rUllf5St6KN+CpDMkLZS0cPXq1cWvoAe7t7b0q9zMbKRoaK+tVJOo+/TDETEnIjoioqOtrcepYgqbMXUyLc1NXcpampuYMXVyXc5nZjZcNCKRPJ9uS1XWxl6VyjuBPar2G5/K+iof30N5Q0yb0s75J+5Pe2sLAtpbWzj/xP3da8vMRrxGdP+9GZgOXJCef1ZV/r8lXUvWsP5SRKyUNA/4WlUD+zHAzIh4UdLLkg4lWy/7Y8B3hvJCups2pd2Jw8xGnbomEknXAEcAu0paQdb76gLgekmnA08DH0i73wYcBywD/gKcBpASxnnA/Wm/cyOi0oD/SbKeYS3AL9LDzMyG0KhbIbGjoyM8jbyZWf9IWhQRHT295ylSzMysECcSMzMrxInEzMwKcSIxM7NCnEjMzKwQJxIzMyvEicTMzApxIjEzs0KcSMzMrBAnEjMzK8SJxMzMCnEiMTOzQpxIzMysECcSMzMrxInEzMwKcSIxM7NCnEjMzKwQJxIzMyvEicTMzApxIjEzs0KcSMzMrBAnEjMzK8SJxMzMCnEiMTOzQpxIzMysECcSMzMrxInEzMwKcSIxM7NCnEjMzKwQJxIzMyukYYlE0uckLZH0iKRrJG0jaaKkBZKWSbpO0ti079Zpe1l6f0LV58xM5UslTW3U9ZiZjVYNSSSS2oFPAx0R8UagCTgF+DpwYUS8AVgDnJ4OOR1Yk8ovTPshad903H7AscD3JDUN5bWYmY12jby1tRXQImkrYFtgJXAUcGN6/0pgWnp9QtomvX+0JKXyayPi1Yj4PbAMOHiI4jczMxqUSCKiE/gG8AxZAnkJWASsjYgNabcVQHt63Q4sT8duSPvvUl3ewzGvkXSGpIWSFq5evXrwL8jMbBRr1K2tnchqExOB3YHtyG5N1UVEzImIjojoaGtrq9dpzMxGpUbd2noX8PuIWB0R64G5wOFAa7rVBTAe6EyvO4E9ANL7rwNeqC7v4RgzMxsCjUokzwCHSto2tXUcDTwK3AmclPaZDvwsvb45bZPevyMiIpWfknp1TQQmAfcN0TWYmRlZg/eQi4gFkm4EfgdsABYDc4BbgWslfSWVXZoOuRT4kaRlwItkPbWIiCWSridLQhuAMyNi45BejJnZKKfsD/vRo6OjIxYuXNjoMMzMSkXSoojo6Ok9j2w3M7NCnEjMzKwQJxIzMyvEicTMzApxIjEzs0KcSMzMrBAnEjMzK8SJxMzMCnEiMTOzQpxIzMysECcSMzMrxInEzMwKcSIxM7NCnEjMzKyQXOuRSDocOBvYKx0jICLi9fULzczMyiDvwlaXAp8DFgFeOMrMzF6TN5G8FBG/qGskZmZWSnkTyZ2SZgNzgVcrhRHxu7pEZWZmpZE3kRySnquXWQzgqMENx8zMyiZXIomII+sdiJmZlVOu7r+Sxkm6VNIv0va+kk6vb2hmZlYGeceRXAHMA3ZP208An61HQGZmVi55E8muEXE9sAkgIjbgbsBmZkb+RPJnSbuQNbAj6VDgpbpFZWZmpZG319bngZuBvSXdA7QBJ9UtKjMzK428iWQx8E5gMtn0KEvxPF1mZkb+ZHBpRGyIiCUR8QgwFritjnGZmVlJ5E0knZK+ByBpJ2A+8OO6RWVmZqWRK5FExD8Dr0j6AfAr4JsRcXldIzMzs1Los41E0olVmwuAfwbuA0LSiRExt57BmZnZ8Fersf34btuLgeZUHmSTOJqZ2SjWZyKJiNPqdWJJrcAlwBvJktLHyXqDXQdMAP4AfCAi1kgScDFwHPAX4NTKzMOSpgNfTh/7lYi4sl4xm5nZlvLOtTVe0k8lrUqPn0gaX/DcFwO/jIh9gAOAx4CzgNsjYhJwe9oGeA8wKT3OAL6f4toZmEU2O/HBwKzUGcDMzIZI3l5bl5MNSNw9PX6eygZE0uuAd5CtvEhE/DUi1gInAJUaxZXAtPT6BOCqyNwLtEraDZgKzI+IFyNiDVlvsmMHGpeZmfVf3kTSFhGXp7EkGyLiCrLR7QM1EVgNXC5psaRLJG0HjIuIlWmf54Bx6XU7sLzq+BWprLfyLiSdIWmhpIWrV68uELaZmXWXN5G8IOmjkprS46PACwXOuxVwEPD9iJgC/JnNt7EAiIggze1VVETMiYiOiOhoayuS/8zMrLu8ieTjwAfIagkryebZOrXAeVcAKyJiQdq+kSyxPJ9uWZGeV6X3O4E9qo4fn8p6KzczsyGSN5GMj4j3R0RbRPxNREwD9hzoSSPiOWC5pMmp6GjgUbJ2mOmpbDrws/T6ZuBjyhwKvJRugc0DjpG0U2pkPyaVmZnZEMk7aeN3yGoMtcr641PA1ZLGAk8Bp5EltuvT6otPk9WCIJvX6zhgGVn339MAIuJFSecB96f9zo2IFwvEZGZm/VRrZPthwFuBNkmfr3prR6CpyIkj4gGgo4e3ju5h3wDO7OVzLgMuKxKLmZkNXK0ayVhg+7TfDlXlL+P1SMzMjNoj2+8C7pJ0RUQ8PUQxmZlZieSd/ddJxMzMeuRVDs3MrJA+E4mkr6fnk4cmHDMzK5taNZLj0sy7M4ciGDMzK59avbZ+CawBtpf0MiCyaUtE1it3xzrHZ2Zmw1yfNZKImBERrcCtEbFjROxQ/TxEMZqZ2TCWa2R7RJwgaRzwllS0ICI8ja6ZmeVe2OpksrXaTyabtuQ+SR6QaGZmuefa+jLwlohYBSCpDfg12ay9ZmY2iuUdRzKmkkSSF/pxrJmZjWB5ayS/lDQPuCZtf5BsRl4zMxvl8ja2z5B0IvC2VDQnIn5av7DMzKws8tZIiIi5wNw6xmJmZiXkdg4zMyvEicTMzArJO47kzT2UvW/wwzEzs7LJWyP5oaQ3VjYkfQj45/qEZGZmZZK3sf0k4EZJHwbeDnwMOKZuUZmZWWnk7f77lKRTgJuAZ4BjImJdXSMzM7NS6DORSHqYbNr4ip2BJmCBJCLiTfUMzszMhr9aNRI3qJuZWZ/6TCQR8XTltaQmYFytY8zMbHTJlRQkfQqYBTwPbErFAfjWlpnZKJe3dvEZYHJEvFDPYMzMrHzyjiNZDrxUz0DMzKyc8tZIngJ+I+lW4NVKYUR8qy5RlcBNizuZPW8pz65dx+6tLcyYOplpU9obHZaZ2ZDLm0ieSY+x6TGq3bS4k5lzH2bd+o0AdK5dx8y5DwM4mZjZqJN3QOI59Q6kTGbPW/paEqlYt34js+ctdSIxs1En76SNbZJmS7pN0h2VR9GTS2qStFjSLWl7oqQFkpZJuk7S2FS+ddpelt6fUPUZM1P5UklTi8aUx7Nrex7U31u5mdlIlrex/WrgcWAicA7wB+D+QTj/Z4DHqra/DlwYEW8A1gCnp/LTgTWp/MK0H5L2BU4B9gOOBb6XxrvU1e6tLf0qNzMbyfImkl0i4lJgfUTcFREfB44qcmJJ44H3ApekbaXPvDHtciUwLb0+IW2T3j867X8CcG1EvBoRvweWAQcXiSuPGVMn09LcNV+1NDcxY+rkep/azGzYydvYvj49r5T0XuBZsnm3irgI+CKwQ9reBVgbERvS9gqg0uDQTtYFmYjYIOmltH87cG/VZ1Yf8xpJZwBnAOy5554Fw97coO5eW2Zm+RPJVyS9DvgC8B1gR+BzAz1pWhRrVUQsknTEQD8nr4iYA8wB6OjoiBq75zJtSrsTh5kZORJJanOYFBG3kA1KPHIQzns48H5JxwHbkCWmi4FWSVulWsl4oDPt3wnsAayQtBXwOuCFqvKK6mPMzGwI1GwjiYiNwIcG86QRMTMixkfEBLLG8jsi4iPAnWSLaAFMB36WXt+ctknv3xERkcpPSb26JgKTgPsGM1YzM+tb3ltb90j6LnAd8OdKYUT8bpDj+RJwraSvAIuBS1P5pcCPJC0DXiRLPkTEEknXA48CG4AzU+IzM7MhouwP+xo7SXf2UBwRUajnViN0dHTEwoULGx2GmVmpSFoUER09vZd3ZPtgtIuYmdkIlHcciZmZWY+cSMzMrBAnEjMzK6TfiUTSnHoEYmZm5TSQGkmPrfZmZjY65R1HUm3VoEdRQl4h0cws0+9EEhHH1iOQMrlpcSczbniQ9ZuyMTida9cx44YHgS1XSHTCMbORzo3tA3D2zUteSyIV6zcFZ9+8pEtZZUnezrXrCDYvyXvTYk8HZmYjhxPJAKxdtz5XeV9L8pqZjRQ1E0laDnfAU8aPZl6S18xGg4bM/lt2O23bnKvcS/Ka2Wgw3Gb/LYVZx+/HjBsfZP3Gze0kzU3ivW/ajcMvuOO1hvUj92njJ4s6u9ze8pK8ZjbS5E0kB6bnc6vKgoLrtpdVT0vtHrlPG9fdt7xLT67r7lvOBw/egzsfX+1eW2Y2YuWaRn4kqdc08gee86seG+FbW5p5YNYxg34+M7OhNOBp5CXtmfMcayPi5X5HNoLk7cllZjbS1Lq1dSXZLSz1sU8AVwBXDVJMZmZWIn0mEi9old9O2zaz5i9b1j566+FlZjZSeEDiIJl1/H40N3WtuDU3iVnH79egiMzMhoYTyQDdtLiTwy+4g4ln3crhF9wBwOyTDqC9tQUB7a0tzD7pAPfQMrMRbyCz/456lTm0KuNDKnNonX/i/txz1qjsEW1mo5hrJAPgObTMzDZzIhkAz6FlZraZE8kAeA4tM7PNnEgG4Mh92vpVbmY2kjmRDMAtD67MXd69d5cXtTKzkca9tgYg73QovfXugi2X5DUzKyvXSOrIvbvMbDRwIhmAMb3MPNa93L27zGw0cCIZgE29zLzfvdy9u8xsNGhIIpG0h6Q7JT0qaYmkz6TynSXNl/Rket4plUvStyUtk/SQpIOqPmt62v9JSdOHIv72XhJB9/IZUyfT0tzUpcwrJJrZSNOoGskG4AsRsS9wKHCmpH2Bs4DbI2IScHvaBngPMCk9zgC+D1niAWYBhwAHA7MqyaeeZkydTHO3+1jNY7RFgpg2pZ3zT9y/y/xb55+4vxvazWxEaUivrYhYCaxMr/8k6TGgHTgBOCLtdiXwG+BLqfyqyJZzvFdSq6Td0r7zI+JFAEnzgWOBa+p+Ed3bSXppN5k2pd2Jw8xGtIa3kUiaAEwBFgDjUpIBeA4Yl163A8urDluRynorr6vZ85ayfmPXBpH1G8O9scxsVGroOBJJ2wM/AT4bES9Lm/+sj4iQNCgLyks6g+yWGHvumXf14N519tLrqnPtOg6/4A6eXbuO3VtbmDF1smsjZjbiNaxGIqmZLIlcHRFzU/Hz6ZYV6XlVKu8E9qg6fHwq6628i4iYExEdEdHR1lZ8GpO+1h3uXLuOSM8zbnhwwCPZPSLezMqiUb22BFwKPBYR36p662ag0vNqOvCzqvKPpd5bhwIvpVtg84BjJO2UGtmPSWV1lbeatH5TcPbNS/r9+ZUR8dVJaebch51MzGxYalSN5HDgH4CjJD2QHscBFwDvlvQk8K60DXAb8BSwDPgh8EmA1Mh+HnB/epxbaXgfLnqbTqUvHhFvZmXSqF5b/4/e7xAd3cP+AZzZy2ddBlw2eNHVttO2zaz5S/8TRF4eEW9mZdLwXltlNOv4/Whu6qulpBiPiDezMnEiyaF7wzfA7JMO6DLQcDB5RLyZlYmnka+ht6ngzz9xf+4566jX9pty7q96vN2107bN/T5npcvw7HlL3ZXYzIY9J5Ia+mr4rv6Hfdbx+zHjxge7DFRsbhKzjt9vQOf1iHgzKwvf2qqhtwbuyuDDvm53zT7pACcDMxvxXCOpYffWlj5HsleeZ9zwILNPPqDL7a7e3LS407etzGzEcCKp4ch92vjxvc/U3K8y+LB7QuieNI7cp42fLOr08rtmNmL41lYNdz6+Ove+va3ZXj1C/ep7n/FgQzMbUZxIaujttlYePTXU9za9igcbmllZ+dZWDU0SGyPf7FrNY+gy+29/ktAYiYln3eo2EzMrHddIasibRADWb+o6+29vY997Kt8Y4QkazayUnEhqKDJqPdgyabQ0N/GRQ/d8rZtwk7ZMK24zMbMy8a2tGvL22upNkCWj3rr6Tjzr1h6Pc5uJmZWFayQ19KfXVk+aa3zDnqDRzMrOiaSGojWD7u0m3VdN9ASNZlZ2TiQ1DHbNoPuqidOmtHP+ift3mVrl/BP3d68tMysNt5HUMGPqZGbc8CDrN+XvvVVL94GLnqDRzMrMNZIcBjOJmJmNNK6R1DBz7kN1+dzqgYsegGhmZeZEUsO69Zvq8rnVMwd70kYzKzPf2hoGPADRzMrMiWSY8ABEMysrJ5JhwgMQzays3EZSQ3s/Z/EdiDGQawCiV1Y0s+HINZIaJuxS/5rCJmDh0y/2uU9Pi2R5lmAzGw5cI6nh3qfWDMl5fnzvM1x97zOv1TSALrWPP7+6odeVFV0rMbNGciKpoT/rkRT12nxcNz4IsXkgZF+31txIb2aN5kRSQ39WSBws6zfmP58b6c2s0ZxIajj09Ttxz3/13X7RSIPZhuPGfDMbCCeSGoZzEgH4j0GKr9KYX2mH8Yh7M8tLMcS3bRqto6MjFi5cmHv/Cb2sYDicjdthLM//6a9dtmcet2+X2saR+7Rx5+OruzTmd5+VGKC1pZnttt6qSy0FcM3FbJSRtCgiOnp8r+yJRNKxwMVAE3BJRFzQ1/6jIZGUxbgdxvLu/f6WaxYsZ2METRIfOmQPbl7cycuvbu6htuPWTew//nVdaoeH770zE9u23+LYjr12rmuS6+n2H5QnsZY9/kYYCbd8B+MaRmwikdQEPAG8G1gB3A98KCIe7e0YJ5KRbYygetb/luamQVsorPvtP4DmMQJ17SAxmOccTGWPvxF6+s7K9v0M1jX0lUjKPiDxYGBZRDwVEX8FrgVOaHBM1kDdl44ZzAkxZ89busVYnvWbYotedsN1Es6yx98IPX1nZft+huIayp5I2oHlVdsrUlkXks6QtFDSwtWrVw9ZcDY8DNZYm/58znAc31P2+Buht++hTN/PUFxD2RNJLhExJyI6IqKjra2tX8fuuHVTnaKyoTJYY2368znDcXxP2eNvhN6+hzJ9P0NxDWVPJJ3AHlXb41PZoHnonGOdTEpkjLputzQ35ZoQM48ZUyfT0tz1t9A8RjQ3dT3pYJ5zMJU9/kbo6Tsr2/czFNdQ9nEk9wOTJE0kSyCnAB8e7JM8dM6xW5S5Eb64svXaqnxOWXs9lT3+RujtOyvT9zMU11DqXlsAko4DLiLr/ntZRHy1r/3722vLzMz67rVV9hoJEXEbcFuj4zAzG63K3kZiZmYN5kRiZmaFOJGYmVkhTiRmZlZI6Xtt9Zek1cDTOXffFfhjHcOpN8ffWGWPH8p/DY5/8OwVET2O6B51iaQ/JC3srbtbGTj+xip7/FD+a3D8Q8O3tszMrBAnEjMzK8SJpG9zGh1AQY6/scoeP5T/Ghz/EHAbiZmZFeIaiZmZFeJEYmZmhTiR9EDSsZKWSlom6axGx5OHpMskrZL0SFXZzpLmS3oyPe/UyBj7ImkPSXdKelTSEkmfSeWluAZJ20i6T9KDKf5zUvlESQvSb+k6SWMbHWtfJDVJWizplrRdmvgl/UHSw5IekLQwlZXi91MhqVXSjZIel/SYpMPKcA1OJN1IagL+FXgPsC/wIUn7NjaqXK4Aui+cchZwe0RMAm5P28PVBuALEbEvcChwZvrey3INrwJHRcQBwIHAsZIOBb4OXBgRbwDWAKc3MMY8PgM8VrVdtviPjIgDq8ZelOX3U3Ex8MuI2Ac4gOy/xfC/hojwo+oBHAbMq9qeCcxsdFw5Y58APFK1vRTYLb3eDVja6Bj7cS0/A95dxmsAtgV+BxxCNip5q1Te5bc13B5kK4zeDhwF3AKoZPH/Adi1W1lpfj/A64DfkzpBlekaXCPZUjuwvGp7RSoro3ERsTK9fg4Y18hg8pI0AZgCLKBE15BuCz0ArALmA/8FrI2IDWmX4f5bugj4IrApbe9CueIP4FeSFkk6I5WV5vcDTARWA5en24uXSNqOElyDE8koEdmfM8O+r7ek7YGfAJ+NiJer3xvu1xARGyPiQLK/7A8G9mlwSLlJeh+wKiIWNTqWAt4WEQeR3ZY+U9I7qt8c7r8fsoUGDwK+HxFTgD/T7TbWcL0GJ5ItdQJ7VG2PT2Vl9Lyk3QDS86oGx9MnSc1kSeTqiJibikt1DQARsRa4k+xWUKukykqkw/m3dDjwfkl/AK4lu711MeWJn4joTM+rgJ+SJfMy/X5WACsiYkHavpEssQz7a3Ai2dL9wKTUW2UscApwc4NjGqibgenp9XSydodhSZKAS4HHIuJbVW+V4hoktUlqTa9byNp3HiNLKCel3YZt/BExMyLGR8QEst/8HRHxEUoSv6TtJO1QeQ0cAzxCSX4/ABHxHLBc0uRUdDTwKCW4Bo9s74Gk48juFzcBl0XEVxscUk2SrgGOIJt2+nlgFnATcD2wJ9nU+R+IiBcbFWNfJL0NuBt4mM336P8PWTvJsL8GSW8CriT7zYwBro+IcyW9nuwv/J2BxcBHI+LVxkVam6QjgH+KiPeVJf4U50/T5lbAv0fEVyXtQgl+PxWSDgQuAcYCTwGnkX5PDONrcCIxM7NCfGvLzMwKcSIxM7NCnEjMzKwQJxIzMyvEicTMzApxIrHSkfTpNDPq1f045mxJnWlm2AckXVDPGIcLSW9PsxE/kMa3VMpbJX0yx/FHVGYCNuvNVrV3MRt2Pgm8KyJW9PO4CyPiG729KakpIjYWC23Y+QhwfkT8uFt5K9n3+L2hD8lGGtdIrFQk/QB4PfALSZ8bhM/7g6SvS/odcLKkvSX9Mk38d7ekfdJ+EyX9Z1rv4iuSXknlXf5il/RdSaem12+WdFf6rHlV01z8Jp3zPklPSHp7Km+S9A1Jj0h6SNKnJB0l6aaqz3+3pJ/SjaSj00R/Dytbm2ZrSZ8APgCc10Pt7QJg71RTma3M7HTuhyV9sIdzvCWdY+80kvyydA2LJZ2Q9jlV0tz0HT4p6V+K/Pexkmj09MN++NHfBz1MF57jmLPJ5ol6ID2mVn3WF6v2ux2YlF4fQjZVCGTTVHwsvT4TeCW9PgK4per47wKnAs3AfwBtqfyDZLMkAPwG+GZ6fRzw6/T6f5HNr1SZtn1nsqncH6/6nH8Hju92bduQzVj9P9L2VWSTXkK2Ts1JPXwfE+i65MDfk81Y3EQ2u+wzZFOWH0E2pfxbgUXAnmn/r5GNcoesdvMEsF269qfIpkTfhmwk9h6N/s34Ud+Hb23ZaNLbra3r4LWZh98K3JBN/QXA1un5cLJ/bAF+RLbgU18mA28E5lkIpHAAAAInSURBVKfPagJWVr1fmZRyEdk/6gDvAn4Qadr2SNNgSPoR8FFJl5NNBPmxHs71+4h4Im1fSZbsLqoRY7W3AddEdmvveUl3AW8BXgb+DpgDHBMRz6b9jyGb5PGf0vY2ZFN4QLYI00sp9keBvei6NIONME4kNmKkf2inAM9GxHH9OPTP6XkM2fobB/ayX0/zCW2g6y3ibSrhAEsi4rBePqsyX9VGav9/eDnwc+C/gRti8/ogQ2Ul2XVNASqJRMDfR8TS6h0lHcLma4N812cl5zYSGzEi4rTIllntTxKpPv5l4PeSToZsRmJJB6S37yGbFReyBuyKp4F9U5tEK9mMrZCtatcm6bD0Wc2S9qsRwnzgfypN2y5p5xTXs2T/gH+ZLKl0txSYIOkNafsfgLtqnOtPwA5V23cDH0ztNG3AO4D70ntrgfcC56cJHQHmAZ9Sqm5JmlLjfDaCOZGYdfUR4HRJDwJLgBNS+WfIFkt6mKpVAiNiOdnMrI+k58Wp/K9k069/PX3WA2S3zfpyCVnbxEPpmA9XvXc1sDwiHut+UET8N9kssTek+DYBP+jrRBHxAnBPalyfTTZz7kPAg8AdZO1Gz1Xt/zzwPuBfU63jPLJ2oIckLUnbNkp59l+zAZD0SkRsP4Tn+y6wOCIuHapzmuXlRGI2AEOZSCQtImvHeXcMw7VAzJxIzMysELeRmJlZIU4kZmZWiBOJmZkV4kRiZmaFOJGYmVkh/x9AZvdC6btnfwAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"length = [len(item[0]) for item in sorted_token_count]\n",
"frequency = [item[1] for item in sorted_token_count]\n",
"plt.scatter(length, frequency)\n",
"\n",
"plt.xlabel('f - Frequency of token')\n",
"plt.ylabel('|r - rank of token')\n",
"plt.title('Zipf\\'s Law')"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"from random import choice"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('to', 4154),\n",
" ('the', 2866),\n",
" ('com', 1724),\n",
" ('for', 885),\n",
" ('raha', 868),\n",
" ('do', 684),\n",
" ('diya', 452),\n",
" ('my', 424),\n",
" ('india', 388),\n",
" ('time', 385)]"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"choose = sorted_token_count[:100]\n",
"l1 = ['to', 'the', 'raha', 'diya', 'my', 'for','com', 'do', 'india','time']\n",
"lst = []\n",
"for k in l1:\n",
" item = (k, token_count[k])\n",
" lst.append(item)\n",
"lst.sort(key=lambda x:x[1], reverse=True)\n",
"lst\n"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 1.0, \"Zipf's Law\")"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEWCAYAAACXGLsWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3de5hdVX3/8feHIci0XAbMmOamgZgGoWISj1yKrQhqAC1JKVWUaqBorOJPqTZCLE8Br9iUi9QCBqFcRCGFGCKCMUJEpAJOSExIMDLl0mRAEggJoPlhEr79Y6+Bk2HmnD2TOXP2zPm8nmc/s/fat+/mkPM9e62111ZEYGZmVsku9Q7AzMyKz8nCzMyqcrIwM7OqnCzMzKwqJwszM6vKycLMzKpysjDrgaTbJc3Iue0IST+T9LykC2odm9lA27XeAZjVg6STgW91s+qPgXMi4osRcWwvDjkTeBrYKyJC0rkAEXFuL+MKYEJEtPdmP7Na852FNaSIuD4i9iifgDOAp4Ar+nDINwCrw0+52hDlZGEGSJoMXAycFBFPprKfSvpomj9F0j2Svilps6RfSzo6rbsamAF8XtILkt7V5djDJd0qaZOkjZLultSrf3uSxku6U9Izkp6WdL2klrTuVEk/KNv2YUn/Vba8VtKkPv2HMUtcDWUNL33p3gR8KSJ+WmHTQ9N2w4ETgPmS9ouIUyQBrIuIs9O2Pynb73PAOqA1LR8G9PYORMDXgJ8BewE3A+eS3Q3dBVyUEtCfALsBh6dr2x/YA1jRy/OZ7cB3FtbQlH3LXws8CPxrlc3XAxdHxNaIuBFYA7w3x2m2AiOBN6R97+5tdVVEtEfE4oh4MSI2ABcC70jrHgGeByYBfwksAp6QdEDa5u6IeKk35zPryncW1ujOBA4C3prjC7yjyzaPA6NynGMO2V3Aj9MdyNyIOL83QUoaAXwD+AtgT7Ifes+WbXIXcCTwxjS/iSxRHJ6WzXaK7yysYUk6Evhn4MSI2JRjl9HpTqTT64Enqu0UEc9HxOciYn/geOCzne0dvfBVsqqrN0fEXsDfkVVNdepMFn+R5u8iSxbvwMnC+oGThTUkSSOBG4AzImJZzt1eB3xa0jBJfwu8Cbgtx7neJ+mNKdFsBrYDlaqFdpO0e9nURHY38QKwWdJoYFaXfe4C3gk0R8Q64G7gGOC1QN7rM+uRk4U1qo8BI4BvpB5M5dPlPexzHzCB7HmKr5DdkTyT41wTyBq8XwB+AVwaEUsqbL8K2FI2nQqcB0whSzY/BOaX7xARv0nHvzstPwc8AtwTEdtzxGhWkdwt3Kw6SacAH42It9c7FrN68J2FmZlV5WRhZmZVuRrKzMyq8p2FmZlVNSQfyhs+fHiMGzeu3mGYmQ0qS5cufToiWrtbNySTxbhx42hra6t3GGZmg4qkx3ta52ooMzOrysnCzMyqcrIwM7OqnCzMzKwqJwszM6tqSPaGMjNrJAuWdTBn0Rqe2LSFUS3NzJo6kemTR/frOZwszMwGsQXLOpg9fyVbtmaDC3ds2sLs+SsB+jVhuBrKzGwQm7NozcuJotOWrduZs2hNv57HycLMbBB7YtOWXpX3lZOFmdkgNqqluVflfeVkYWY2iM2aOpHmYU07lDUPa2LW1In9eh43cJuZDWKdjdjuDWVmZhVNnzy635NDVzWvhpLUJGmZpFvT8n6S7pPULulGSbul8tek5fa0flzZMWan8jWSptY6ZjMz29FAtFl8BniobPnrwEUR8UbgWeC0VH4a8Gwqvyhth6QDgZOAg4BjgEsl7VhBZ2ZmNVXTZCFpDPBe4NtpWcBRwE1pk2uA6Wl+WlomrT86bT8NuCEiXoyIR4F24JBaxm1mZjuq9Z3FxcDngZfS8muBTRGxLS2vAzor2kYDawHS+s1p+5fLu9nnZZJmSmqT1LZhw4b+vg4zs4ZWs2Qh6X3A+ohYWqtzlIuIuRFRiohSa2u3bwU0M7M+qmVvqCOA4yUdB+wO7AV8A2iRtGu6exgDdKTtO4CxwDpJuwJ7A8+UlXcq38fMzAZAze4sImJ2RIyJiHFkDdR3RsTJwBLgxLTZDOCWNL8wLZPW3xkRkcpPSr2l9gMmAPfXKm4zM3u1ejxncSZwg6QvA8uAK1P5lcB1ktqBjWQJhohYJWkesBrYBpweEdtffVgzM6sVZT/eh5ZSqRRtbW31DsPMbFCRtDQiSt2t89hQZmZWlZOFmZlV5WRhZmZVOVmYmVlVThZmZlaVk4WZmVXlZGFmZlU5WZiZWVVOFmZmVpWThZmZVeVkYWZmVTlZmJlZVU4WZmZWlZOFmZlV5WRhZmZVOVmYmVlVThZmZlZVzZKFpN0l3S/pV5JWSTovlV8t6VFJy9M0KZVL0iWS2iWtkDSl7FgzJD2cphk9ndPMzGqjlu/gfhE4KiJekDQM+Lmk29O6WRFxU5ftjwUmpOlQ4DLgUEn7AucAJSCApZIWRsSzNYzdzMzK1OzOIjIvpMVhaar0wu9pwLVpv3uBFkkjganA4ojYmBLEYuCYWsVtZmavVtM2C0lNkpYD68m+8O9Lq76SqpoukvSaVDYaWFu2+7pU1lO5mZkNkJomi4jYHhGTgDHAIZL+DJgNHAC8DdgXOLM/ziVppqQ2SW0bNmzoj0OamVkyIL2hImITsAQ4JiKeTFVNLwL/CRySNusAxpbtNiaV9VTe9RxzI6IUEaXW1tZaXIaZWcOqmiwk/amkKyT9WNKdnVOO/VoltaT5ZuDdwK9TOwSSBEwHHky7LAQ+knpFHQZsjogngUXAeyTtI2kf4D2pzMzMBkie3lD/BVwOXAFs78WxRwLXSGoiS0rzIuLWlGxaAQHLgX9I298GHAe0A78HTgWIiI2SvgT8Mm33xYjY2Is4zMxsJymiUgclkLQ0It46QPH0i1KpFG1tbfUOw8xsUEnf96Xu1uVps/iBpE9KGilp386pn2M0M7MCy1MN1fnE9KyysgD27/9wzMysiKomi4jYbyACMTOz4srTG+qPJJ0taW5aniDpfbUPzczMiiJPm8V/An8A/jwtdwBfrllEZmZWOHmSxfiI+FdgK0BE/J6s26uZmTWIPMniD+mhugCQNJ5sRFkzM2sQeXpDnQP8CBgr6XrgCOCUWgZlZmbFkqc31GJJDwCHkVU/fSYinq55ZGZmVhh5ekN9MSKeiYgfRsStwMZ0h2FmZg0iT5vFWEmzAdK7J74PPFzTqMzMrFDyJIu/B96cEsYPgJ9GxLk1jcrMzAqlxzYLSVPKFr8BfAu4B7hL0pSIeKDWwZmZWTFUauC+oMvys8CBqTyAo2oVlJmZFUuPySIi3jmQgZiZWXHl6Q21t6QLO99vLekCSXsPRHBmZlYMeRq4rwKeB96fpufIxosyM7MGkecJ7vER8Tdly+dJWl6rgMzMrHjy3FlskfT2zgVJRwBbqu0kaXdJ90v6laRVks5L5ftJuk9Su6QbJe2Wyl+TltvT+nFlx5qdytdImtrbizQzs52TJ1n8A/Afkh6T9BjwTeDjOfZ7ETgqIt4CTAKOkXQY8HXgooh4I1kPq9PS9qcBz6byi9J2SDoQOAk4CDgGuFRSU87rMzOzfpAnWTyXvvAPBg6OiMlkbRgVReaFtDgsTZ1dbm9K5dcA09P8tLRMWn+0JKXyGyLixYh4FGgHDskRt5mZ9ZM8yeJmgIh4LiKeS2U3Vdj+ZZKaUvvGemAx8D/ApojYljZZB4xO86OBtelc24DNwGvLy7vZp/xcMzt7bG3YsCFPeGZmllOlJ7gPIKv62VvSCWWr9gJ2z3PwiNgOTJLUQjam1AE7EWu1c80F5gKUSqWo1XnMzBpRpd5QE4H3AS3AX5WVPw98rDcniYhNkpYAhwMtknZNdw9jyF7TSvo7FlgnaVdgb+CZsvJO5fuYmdkAqPQE9y3ALZIOj4hf9PbAklqBrSlRNAPvJmu0XgKcCNwAzABuSbssTMu/SOvvjIiQtBD4rqQLgVHABOD+3sZjZmZ9l+flR71OFMlI4JrUc2kXYF5E3CppNXCDpC8Dy4Ar0/ZXAtdJagc2kvWAIiJWSZoHrAa2Aaen6i0zMwMWLOtgzqI1PLFpC6Nampk1dSLTJ7+qaXenKGLoVe+XSqVoa2urdxhmZjW3YFkHs+evZMvWV35DNw9r4msnvLnXCUPS0ogodbcuT28oMzMrqDmL1uyQKAC2bN3OnEVr+vU8eQYSHCHpSkm3p+UDJZ1WbT8zM6u9JzZ1P6BGT+V9lefO4mpgEVnjMsBvgDP6NQozM+uTUS3NvSrvqzzJYnhEzANegpcfmHMDs5lZAcyaOpHmYTuOgNQ8rIlZUyf263nyjDr7O0mvJRuqgzS+0+Z+jcLMzPqksxG71r2h8iSLz5I9AzFe0j1AK9lzEGZmVgDTJ4/u9+TQVZ7nLB6Q9A6yJ7oFrImIrTWNyszMCiXPnQVko7yOS9tPkUREXFuzqMzMrFCqJgtJ1wHjgeW80rAdgJOFmVmDyHNnUQIOjKH4qLeZmeWSp+vsg8Cf1DoQMzMrrkrvs/gBWXXTnsBqSfeTvSoVgIg4vvbhmZlZEVSqhvq3AYvCzMwKrdL7LO4CkPT1iDizfJ2krwN31Tg2MzMriDxtFu/upuzY/g7EzMyKq1KbxSeATwL7S1pRtmpP4J5aB2ZmZsVRqc3iu8DtwNeAs8rKn4+IjTWNyszMCqXHaqiI2BwRj0XEByPi8bIpV6KQNFbSEkmrJa2S9JlUfq6kDknL03Rc2T6zJbVLWiNpaln5MamsXdJZ3Z3PzMxqJ+9wH32xDfhcGltqT2CppMVp3UURsUNvK0kHkr13+yCyd2f8RNKfptX/QdZ2sg74paSFEbG6hrGbmVmZSm0Wr4mIF3taX01EPAk8meafl/QQUGlYxGnADemcj0pqJxuTCqA9Ih5Jcd2QtnWyMDMbIJV6Q/0CXh4baqdIGgdMBu5LRZ+StELSVZL2SWWjgbVlu61LZT2Vdz3HTEltkto2bNiwsyGbmVmZSsliN0kfAv5c0gldp7wnkLQHcDNwRkQ8B1xGNjDhJLI7jwt2Iv6XRcTciChFRKm1tbU/DmlmZkmlNot/AE4GWoC/6rIugPnVDi5pGFmiuD4i5gNExFNl668Abk2LHcDYst3HpDIqlJuZ2QCo9AT3z4GfS2qLiCt7e2BJAq4EHoqIC8vKR6b2DIC/JhuoELK38X1X0oVkDdwTgPvJXrg0QdJ+ZEniJOBDvY3HzMz6Lk9vqOskfRr4y7R8F3B5jrflHQF8GFgpaXkq+wLwQUmTyO5OHgM+DhARqyTNI2u43gacHhHbASR9ClgENAFXRcSqnNdnZmb9QNVeUyHp28Aw4JpU9GFge0R8tMax9VmpVIq2trZ6h2FmNqhIWhoRpe7W5bmzeFtEvKVs+U5Jv+qf0MzMbDDIM5DgdknjOxck7c8rr1c1M7MGkOfOYhawRNIjZI3NbwBOrWlUZmZWKFWTRUTcIWkCMDEVrdmZJ7vNzGzwyTU2VEoOK6puaGZmQ1KeNgszM2twThZmZlZV1WQhab6k90pyYjEza1B5EsClZMNrPCzpfEkTq+1gZmZDS9VkERE/iYiTgSlkw3P8RNJ/Szo1DRRoZmZDXK6qJUmvBU4BPgosA75BljwWV9jNzMyGiKpdZyV9n+wZi+uAvyobMfZGSR6AycysAeR5zuKSiFjS3YqeBpwyM7OhJU811IGSWjoXJO0j6ZM1jMnMzAomT7L4WERs6lyIiGeBj9UuJDMzK5o8yaIpvfUOAElNwG61C8nMzIomT5vFj8gas7+Vlj+eyszMrEHkubM4E1gCfCJNdwCfr7aTpLGSlkhaLWmVpM+k8n0lLZb0cPq7TyqXpEsktUtaIWlK2bFmpO0fljSjLxdqZmZ9l2eI8peAy9LUG9uAz0XEA5L2BJZKWkz2vMYdEXG+pLOAs8gS0rHAhDQdms53qKR9gXOAEtl7u5dKWpjaTszMbADkGRvqiHQH8BtJj0h6NL0IqaKIeDIiHkjzzwMPAaOBabzyPu9rgOlpfhpwbWTuBVokjQSmAosjYmNKEIuBY3p5nWZmthPytFlcCfwjsJQ+vk5V0jhgMnAfMKLswb7fAiPS/Ghgbdlu61JZT+VdzzETmAnw+te/vi9hmplZD/K0WWyOiNsjYn1EPNM55T2BpD2Am4EzIuK58nUREWRVSzstIuZGRCkiSq2trf1xSDMzS/IkiyWS5kg6XNKUzinPwdNAgzcD10fE/FT8VKpeIv1dn8o7gLFlu49JZT2Vm5nZAMlTDXVo+ls+tEcAR1XaKT2bcSXwUERcWLZqITADOD/9vaWs/FOSbkjn3BwRT0paBHy1s9cU8B5gdo64zcysn+TpDfXOPh77CODDwEpJy1PZF8iSxDxJpwGPA+9P624DjgPagd8Dp6bzb5T0JeCXabsvRsTGPsZkZmZ9oKzZoMIG0gjgq8CoiDhW0oHA4RFx5UAE2BelUina2jwgrplZb0ha2tMAsXnaLK4GFgGj0vJvgDP6JzQzMxsM8iSL4RExD3gJICK20ccutGZmNjjlSRa/S2/KCwBJhwGbaxqVmZkVSp7eUJ8l66k0XtI9QCtwYk2jMjOzQsnTG+oBSe8ge7WqgDURsbXmkZmZWWHkeQf3R7oUTZFERFxbo5jMzKxg8lRDva1sfnfgaOABwMnCrAEtWNbBnEVreGLTFka1NDNr6kSmT37VcG02xOSphvp/5cvpfdw31CwiMyusBcs6mD1/JVu2Zh0iOzZtYfb8lQBOGENcnt5QXf0O2K+/AzGz4puzaM3LiaLTlq3bmbNoTZ0isoGSp83iB7wyMuwuwIHAvFoGZWbF9MSmLb0qt6EjT5vFv5XNbwMej4h1NYrHzApsVEszHd0khlEtzXWIxgZS1WqoiLirbLrHicKscc2aOpHmYU07lDUPa2LW1Il1isgGSp5qqOfp/gVFInt/0V79HpWZFVJnI7Z7QzWePNVQFwNPAteRJYiTgZER8S+1DMzMimn65NFODg0oT2+o4yPi0oh4PiKei4jLgGm1DszMzIoj70CCJ0tqkrSLpJPJus+amVmDyJMsPkT2Nrun0vS3qczMzBpEnt5Qj0XEtIgYHhGtETE9Ih6rtp+kqyStl/RgWdm5kjokLU/TcWXrZktql7RG0tSy8mNSWbuks/pwjWZmtpOqJgtJfyrpjs4vfUkHSzo7x7GvBo7ppvyiiJiUptvSMQ8ETgIOSvtcmqq9moD/AI4lexjwg2lbMzMbQHmqoa4AZgNbASJiBdkXe0UR8TNgY844pgE3RMSLEfEo0A4ckqb2iHgkIv5ANiaVG9fNzAZYnmTxRxFxf5eybTtxzk9JWpGqqfZJZaOBtWXbrEtlPZW/iqSZktoktW3YsGEnwjMzs67yJIunJY3nldeqnkj23EVfXAaMByalY1zQx+O8SkTMjYhSRJRaW1v767BmZka+h/JOB+YCB0jqAB4lezCv1yLiqc55SVcAt6bFDmBs2aZjUhkVys3MbIBUTBaSdgFKEfEuSX8M7BIRz/f1ZJJGRkTnXclfA509pRYC35V0ITAKmADcT/bE+ARJ+5EliZNwt10zswFXMVlExEuSPg/Mi4hePYgn6XvAkcBwSeuAc4AjJU0iq9J6DPh4Os8qSfOA1WTtIadHxPZ0nE8Bi4Am4KqIWNWbOMzMbOcporsxAss2kM4HngZupOzJ7YjI29NpwJVKpWhra6t3GGZmg4qkpRFR6m5dnjaLD6S/p5eVBbD/zgZmZmaDQ553cPsVqmZmDa7HrrOSvlo2/+6BCcfMzIqo0nMW5UN1fL3WgZiZWXHleSjPzMwaXKU2i9dJ+izZsw6d8y+LiAtrGpmZmRVGpWRxBbBnN/NmZtZgekwWEXHeQAZiZmbF5TYLMzOrysnCzMyqcrIwM7OqepUsJN1afSszMxtqentn0e1b6szMbGjrbbJYVpMozMys0HqVLCLi72sViJmZFZcbuM3MrConCzMzq6rSEOXXpb+fGbhwzMysiCrdWbxV0ijg7yXtI2nf8qnagSVdJWm9pAfLyvaVtFjSw+nvPqlcki6R1C5phaQpZfvMSNs/LGnGzlysmZn1TaVkcTlwB3AAsLTLlOcF11ez4zsxAM4C7oiICenYZ6XyY4EJaZoJXAZZcgHOAQ4FDgHO6UwwZmY2cHpMFhFxSUS8CbgqIvaPiP3Kpqrv346InwEbuxRPA65J89cA08vKr43MvUCLpJHAVGBxRGyMiGeBxbw6AZmZWY1VbeCOiE/04/lGRMSTaf63wIg0PxpYW7bdulTWU/mrSJopqU1S24YNG/oxZDMzq1tvqIgIIPrxeHMjohQRpdbW1v46rJmZMfDJ4qlUvUT6uz6VdwBjy7Ybk8p6KjczswE00MliIdDZo2kGcEtZ+UdSr6jDgM2pumoR8J7UG2sf4D2pzMzqZMGyDo44/072O+uHHHH+nSxY5t9vjaDSa1V3iqTvAUcCwyWtI+vVdD4wT9JpwOPA+9PmtwHHAe3A74FTASJio6QvAb9M230xIro2mtsQtmBZB3MWreGJTVsY1dLMrKkTmT7Z41nWy4JlHcyev5ItW7cD0LFpC7PnrwTw5zLEKWs6GFpKpVK0teXp3WtF1vWLCaB5WBNfO+HN/mKqkyPOv5OOTVteVT66pZl7zjqqDhFZf5K0NCJK3a3zcB9WWHMWrdkhUQBs2bqdOYvW1Ckie6KbRFGp3IYOJwsrLH8xFc+oluZeldvQ4WRhheUvpuKZNXUizcOadihrHtbErKkT6xSRDRQnCyssfzEVz/TJo/naCW9mdEszImurcBtSY6hZbyizndX5BeTeUMUyffJofwYNyMnCCs1fTGbF4GooMzOryncWVmh+KM+sGJwsrLD8tLBZcbgaygrLD+WZFYeThRWWH8ozKw4nCyssP5RnVhxOFlZYfijPrDjcwG2F5YfyzIrDycIKzQ/lmRWDq6HMzKwqJwszM6uqLslC0mOSVkpaLqktle0rabGkh9PffVK5JF0iqV3SCklT6hGzmVkjq+edxTsjYlLZK/zOAu6IiAnAHWkZ4FhgQppmApcNeKRmZg2uSA3c04Aj0/w1wE+BM1P5tZG9LPxeSS2SRkbEk3WJ0qzBebyuxlSvO4sAfixpqaSZqWxEWQL4LTAizY8G1pbtuy6V7UDSTEltkto2bNhQq7jNGlrneF0dm7YQvDJe14JlHfUOzWqsXncWb4+IDkmvAxZL+nX5yogISdGbA0bEXGAuQKlU6tW+Vlz+FVsslcbr8ucytNUlWURER/q7XtL3gUOApzqrlySNBNanzTuAsWW7j0llNsR51Nni8XhdjWvAq6Ek/bGkPTvngfcADwILgRlpsxnALWl+IfCR1CvqMGBzLdsrzl6wkvGzb2PcWT9k/OzbOHvBylqdyqrwqLPF4/G6Glc92ixGAD+X9CvgfuCHEfEj4Hzg3ZIeBt6VlgFuAx4B2oErgE/WKrCzF6zkO/f+L9sjq8XaHsF37v1fJ4w68a/Y4vF4XY1rwKuhIuIR4C3dlD8DHN1NeQCnD0BofO++tT2Wf3n6mwciBCszqqWZjm4Sg3/F1o/H62pcReo6W3eddxR5y622Zk2duEObBfhXbBF4vK7G5GRRpknqNjE0SXWIxvwr1qw4nCzKfPDQsXzn3v/tttzqw79izYrByaJMZ7vE9+5by/YImiQ+eOhYt1eYWcNTDMH6+FKpFG1tbfUOw8xsUJG0tGy8vh14iHIzM6vKycLMzKpysjAzs6qcLMzMrConCzMzq2pI9oaStAF4fCcPMxx4uh/CqTdfR/EMlWsZKtcBQ+dadvY63hARrd2tGJLJoj9IauupC9lg4usonqFyLUPlOmDoXEstr8PVUGZmVpWThZmZVeVk0bO59Q6gn/g6imeoXMtQuQ4YOtdSs+twm4WZmVXlOwszM6vKycLMzKpq6GQh6SpJ6yU92MN6SbpEUrukFZKmDHSMeeS4jiMlbZa0PE3/MtAx5iFprKQlklZLWiXpM91sM1g+kzzXUvjPRdLuku6X9Kt0Hed1s81rJN2YPpP7JI0b+Egry3kdp0jaUPZ5fLQeseYlqUnSMkm3drOu/z+TiGjYCfhLYArwYA/rjwNuBwQcBtxX75j7eB1HArfWO84c1zESmJLm9wR+Axw4SD+TPNdS+M8l/XfeI80PA+4DDuuyzSeBy9P8ScCN9Y67j9dxCvDNesfai2v6LPDd7v4fqsVn0tB3FhHxM2BjhU2mAddG5l6gRdLIgYkuvxzXMShExJMR8UCafx54COj6mrzB8pnkuZbCS/+dX0iLw9LUtVfMNOCaNH8TcLRUrHcR57yOQUPSGOC9wLd72KTfP5OGThY5jAbWli2vYxD+g08OT7fgt0s6qN7BVJNumyeT/QIsN+g+kwrXAoPgc0nVHcuB9cDiiOjxM4mIbcBm4LUDG2V1Oa4D4G9S9eZNkor8PuWLgc8DL/Wwvt8/EyeLxvAA2ZgvbwH+HVhQ53gqkrQHcDNwRkQ8V+94dkaVaxkUn0tEbI+IScAY4BBJf1bvmPoix3X8ABgXEQcDi3nll3mhSHofsD4ilg7keZ0sKusAyn9djEllg0pEPNd5Cx4RtwHDJA2vc1jdkjSM7Mv1+oiY380mg+YzqXYtg+lzAYiITcAS4Jguq17+TCTtCuwNPDOw0eXX03VExDMR8WJa/Dbw1oGOLacjgOMlPQbcABwl6Ttdtun3z8TJorKFwEdSD5zDgM0R8WS9g+otSX/SWV8p6RCyz71w/5hTjFcCD0XEhT1sNig+kzzXMhg+F0mtklrSfDPwbuDXXTZbCMxI8ycCd0ZqWS2KPNfRpe3reLJ2psKJiNkRMSYixpE1Xt8ZEX/XZbN+/0x23ZmdBztJ3yPrkTJc0jrgHLKGLyLicuA2st437cDvgVPrE2llOa7jROATkrYBW4CTivaPOTkC+DCwMtUtA3wBeD0Mrs+EfNcyGD6XkcA1kprIktm8iLhV0heBtohYSJYUr5PUTtbR4qT6hdujPNfxaUnHA9vIruOUukXbB7X+TDzch5mZVeVqKDMzq8rJwszMqnKyMDOzqpwszMysKicLMzOrysnChgxJ50r6vaTXlfMixYQAAAQbSURBVJW9UGmfXhx7nHoY1bc/pdFCf5JGPf1Arc9XJZbjJZ1VzxisOBr6OQsbkp4GPgecWe9AyknaNY3RU81kgDQsRV2l/voL6x2HFYPvLKxu0q/1X0u6WtJvJF0v6V2S7pH0cHqqubeuAj4gad9uzvVg2fI/STo3zf9U0kWS2iQ9JOltkuanGL5cdphdU4wPpYHm/ijt/1ZJd0laKmlR55PA6bgXS2oDdnifhaR9JS1Ig9bdK+ngdEf0HeBt6c5ifJd9csUp6e+UvbthuaRvpQfRkHRZ2neH9zlIekzSeZIekLRS0gGp/BRJ30zzVyt7j8h/S3pE0ompfBdJl6bPcbGk28rWna/sfR4rJP1bHz5LKxAnC6u3NwIXAAek6UPA24F/InviubdeIEsYr3rZUBV/iIgScDlwC3A68GfAKZI6R+ucCFwaEW8CngM+qWz8p38HToyIt6Zzf6XsuLtFRCkiLuhyvvOAZWnQui+QDbu+HvgocHdETIqI/+ltnJLeBHwAOCLdnWwHTk77/nPa92DgHZIOLjvu0xExBbiM7L99d0aSfTbvA85PZScA44ADyZ5YPxwg/Tf7a+CgdI1f7nowG1xcDWX19mhErASQtAq4IyJC0kqyL6G+uARY3stfs53VLSuBVZ3jTUl6hGxAtk3A2oi4J233HeDTwI/IvqwXKxvmqQkoH6vqxh7O93bgbwAi4s70Rb9XP8T5drIB8H6Z4mkmG5Ib4P2SZpL9ux9J9gW/Iq3rHOhwKVkC6M6CiHgJWC1pRNl1/Fcq/62kJal8M/D/gSuVvcntVW9zs8HFycLq7cWy+ZfKll+im/8/Jf0nWb3+ExFxXHcHjIhNkr5L9qu70zZ2vJPevYc4ymPoGkfXsXGC7A1sqyLi8O5iAX7XQ3lfVYtTwDURMbt8J0n7kd0xvC0inpV0NTv+N+g81nZ6/l4oP1/FF+lExLZUjXg02RhYnwKOqrSPFZuroWxQiYhTUxVNt4mizIXAx3nli+8p4HXpF/xryKpSeuv1kjqTwoeAnwNrgNbOcknDlO8lRneTqockHUlWDdQf7+64Azixs0dYaht5A7AXWeLanO4Kju2HcwHcQ/bCoF3ScY9M590D2DsNvf6PwFv66XxWJ76zsCEpIp6W9H2yLyoiYquyUTnvJxvrv+sw23msAU6XdBWwGrgsIv6QGnQvkbQ32b+pi4FVVY51LnCVpBVko+fOqLx5PhGxWtLZwI8l7QJsBU6PiHslLSO77rVkX/L94Wayu4fV6bgPkFVB7QncIml3sruQz/bT+axOPOqsme0USXtExAupUft+ssb139Y7LutfvrMws511q7IXC+0GfMmJYmjynYWZmVXlBm4zM6vKycLMzKpysjAzs6qcLMzMrConCzMzq+r/AMj9uXzHC0BgAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"meanings = {'to':4,'the':4 ,'raha':2, 'diya':3, 'my':2, 'for':3, 'com':3, 'do':2, 'india':1,'time':1}\n",
"m = [meanings[lst[i][0]] for i in range(10)]\n",
"f = [lst[i][1] for i in range(10)] \n",
"plt.scatter(m, f)\n",
"plt.xlabel('m - Number of meanings')\n",
"plt.ylabel('f - Frequency of the token')\n",
"plt.title('Zipf\\'s Law')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 3. Heaps' Law"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"vsize = 0\n",
"num_tokens = 0\n",
"unique_tokens = []\n",
"V = []\n",
"N = []\n",
"\n",
"for i in range(len(tokens)):\n",
" s = tokens[i]\n",
" if s not in unique_tokens:\n",
" unique_tokens.append(s)\n",
" vsize += 1\n",
" V.append(vsize)\n",
" N.append(i+1)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"N = np.array(N)\n",
"V = np.array(V)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 1.0, \"Heaps' Law\")"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAEWCAYAAABMoxE0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3dd3zV1f3H8deHjWxkGPZUQBliBNyoLSJqsdYqOOvCOqq2tRVbq3b82tq6994D92hFERVQVEYYyoawiUDYhE2Sz++P74leaSCXJDc3N3k/H4/7uN97vuvz5YZ88j3nfM8xd0dERKS4qiQ7ABERSW1KJCIiUiJKJCIiUiJKJCIiUiJKJCIiUiJKJCIiUiJKJCIiUiJKJCKAmS0xsx/tUfYLMxufrJhCDHt90KuwmEWSQYlERERKRIlEJE5m1sLM3jSzNWa22Myui1nXx8y+MrONZrbSzB40sxox693MrjOzRWa21sz+bWZVwrpOZjbOzDaFda+WMM5GZvbfEOeGsNwqrDvRzGbEbDvazCbHfP7czM4syfml8lEiEYlD+KX/H+BroCVwMnCDmZ0SNskDfg00AY4K66/e4zA/BdKB3sBg4NJQ/lfgI6AR0Ap4oGAHd7dihFsFeAZoC7QBtgMPhnUTgM5m1sTMqgM9gBZmVs/Maof4Pi/GOaUSq5bsAETKkXfMLDfmcw1galg+Emjq7n8JnxeZ2RPAEGCUu0+J2W+JmT0GnADcG1N+h7uvB9ab2b3AUOBJYDfRL/0W7r4CKFG7jLuvA94s+Gxm/weMCeu2hzuQ44FviRLjRuAYYCewIOwvEjfdkYh870x3b1jw4od3FG2J/nLfWPAC/gA0BzCzg0MV0ioz2wz8nejuJNbymOWlQIuw/HvAgElmNsvMLqUEzOwAM3vMzJaGWD4DGppZ1bDJOKA/UTIZB4wlSnonhM8i+0WJRCQ+y4HFsYnG3eu5+6Cw/hFgLtDZ3esTJZk9q6Vaxyy3IbojwN1XufsV7t4CuBJ42Mw6lSDW3wKHAH1DLMeH8oJ49kwk41AikRJQIhGJzyQgx8xuMrPaZlbVzA4zsyPD+nrAZmCLmXUBrirkGL8LDeGtgeuBVwHM7OcFjeHABsCB/Djjqm5mtWJe1UIs24GNZtYYuG2Pfb4kSjR9gEnuPovojqsv0d2LyH5RIhGJg7vnAacDvYDFwFqi9o0GYZMbgfOAHOAJQpLYw7vAFGA68D7wVCg/EphoZluA94Dr3X1RnKGNJEoaBa/bidplaocYJwAf7nEtW4nafma5+65Q/BWw1N2z4zyvyHdME1uJJF54sLCzu2cmOxaR0qY7EhERKRElEhERKRFVbYmISInojkREREqk0j3Z3qRJE2/Xrl2ywxARSSlTpkxZ6+5NC1tX6RJJu3btyMjISHYYIiIpxcyW7m2dqrZERKRElEhERKRElEhERKRElEhERKRElEhERKRElEhERKRElEhERKRElEhERCq4FRu28cRni/gic21Cjl/pHkgUEakMMrO3MHr2aj6YuZJvVmwC4Kr+HTmm054zQJecEomISAWxKzefUbNW8cKEpUxavB6A7i0bcNPALpzWPY02Bx6QkPMqkYiIpLDcvHy+WLiO97/5lv9+s5Jtu/Jo0/gAbj61Cz/p1YK0BrUTHoMSiYhICpr97WZenLiUj2atYu2WXdSuXpUBhzZncK8W9D+4GVWqWJnFokQiIpIisnN28OKEZbz/zbcsXLOVmtWq8ONuzTm9Rxr9D2lGrepVkxKXEomISDm2Oy+fMXOzeWXSMsZnrmV3nnNc5yac37ctZ/VuScMDaiQ7RCUSEZHyKGvjdt6asoIRk5eTtXE7zerV5NJj2nPuka3p0LRussP7ASUSEZFywt35bMFanv9yCWPnryEv3+nTvjG3ntGNk7s0o1rV8vnonxKJiEiS7czN4z9fr+ThsZksWrOV5vVrctmx7TmvTxvaNamT7PCKpEQiIpIkC9ds4dXJy3ktYzkbt+2mW1p9/v7T7vzsiJbUrJachvPiUCIRESlDO3Pz+GRONo+MXciMrE1UrWIM6Nacc49szXGdm1K1DLvtlhYlEhGRMjBn5WaeGr+YD2euYsvOXFo0qMUtp3XlJz1b0Kx+rWSHVyJKJCIiCbJp+25GzVzFG1NWMGnJeurUqMqp3dMY1P0gju/ctNw2nu+vhCYSM6sFfAbUDOd6w91vM7P2wAjgQGAKcKG77zKzmsDzwBHAOuBcd18SjnUzcBmQB1zn7qNC+UDgPqAq8KS7/zOR1yQiUpTM7Bzu+XgBH85cRV6+0/bAAxh+aheGHNm6XDz3UdoSfUeyEzjJ3beYWXVgvJl9APwGuMfdR5jZo0QJ4pHwvsHdO5nZEOAO4Fwz6wYMAQ4FWgAfm9nB4RwPAT8GVgCTzew9d5+d4OsSEfmB3Lx8Pp6TzbNfLmbCouju49Jj2nFajxb0bNUAs9Rr+4hXQhOJuzuwJXysHl4OnAScF8qfA24nSiSDwzLAG8CDFv3rDwZGuPtOYLGZZQJ9wnaZ7r4IwMxGhG2VSESkTKzbspMRk5fzzBdLWLtlJwfVr8WNAw5maJ82HFi3ZrLDKxMJbyMxs6pE1VediO4eFgIb3T03bLICaBmWWwLLAdw918w2EVV/tQQmxBw2dp/le5T3TcBliIh8Jy/f+XjOal7PWMFn89ewKy+fYzs14YJ+h/GjruX3wcFESXgicfc8oJeZNQTeBrok+px7MrNhwDCANm3alPXpRaSCWLlpO29NzeKlCUv5dtMOmtaryYVHtWXIka3p3LxessNLmjLrteXuG81sDHAU0NDMqoW7klZAVtgsC2gNrDCzakADokb3gvICsfvsrTz23I8DjwOkp6d7qV2UiFR423flMW7+Gt6cuoJP5qwm3/l+2JKuzaleye4+CpPoXltNgd0hidQmahS/AxgDnE3Uc+ti4N2wy3vh81dh/afu7mb2HvCymd1N1NjeGZgEGNA59ALLImqQL2h7EREptuXrt/Hk54t4c2oWW3bmcmCdGlxxfAfOTS9/gyYmW6LvSNKA50I7SRXgNXf/r5nNBkaY2d+AacBTYfungBdCY/p6osSAu88ys9eIGtFzgWtClRlmdi0wiqj779PuPivB1yQiFZS780XmOp4cv4ix89ZgBmf0aME56a3p26Gx7j72wqKOVZVHenq6Z2RkJDsMESlHNm3bzetTlvPypGUsWrOVA+vU4Px+bTknvRWtGiVmnvNUY2ZT3D29sHV6sl1EKq3l67fxxOeLeHtqFjk7c+ndpiH/OrsHg3u1SKlBE5NNiUREKpUdu/P47zcreWPKciYsWk/1qsag7mlcfmwHurdqkOzwUpISiYhUCvNW5fDKpGW8Oz2LDdt207pxbW4ccDA/7d2Klg1rJzu8lKZEIiIV1sZtu3hrahZvTl3BrG83U72qMeDQgzi/TxuO6nhghR62pCwpkYhIhZKzYzdj563hlUnLmLBoHfkO3dLqc+vp3Rjcq0WlGbakLCmRiEiFMH91Dg9+msnIGSvJzXdaNqzN1f07MfCwgzispdo+EkmJRERSVn4Y82rE5OWMmZdNnRrVuKBfWwYedhBHtmuckrMNpiIlEhFJObvz8nn/m5U8OCaTzOwtNK1XkyuP78jlx7WniaquypwSiYikjGnLNvD2tCzenpZFzo5cDm5el/uHHs6gww6qdCPulidKJCJS7n2zYiP/HjWPzxespWa1Kgw49CAG92zBSV2aUUXVV0mnRCIi5dbcVZt5eMxC3vv6WxoeUJ0bBxzML45pT92a+tVVnujbEJFyZ9Li9Tzx+SJGz15NrepVuKp/R67u35F6taonOzQphBKJiJQL+fnOmHnZPDpuIZOXbKDhAdW59sROXHZsexrVqZHs8GQflEhEJKl27M7jnWlZvDRxGTOyNpHWoBa3nt6NoX3aULuGBk5MBUokIpIUm7bv5sUJS3nuyyVk5+ykfZM6/OvsHvykZwtqVVcCSSVKJCJSpjZs3cULE5byzBeL2bBtN8d1bsLd5/TimE4a+ypVKZGISJlYvn4b94yez3+/WcmuvHyO7dSEmwZ20dDtFYASiYgk1LxVOTw2biHvfv0t1asa5xzZigv7teOQg+olOzQpJUokIpIQo2ev5qExmUxfvpE6NapyYb+2XH5ce01dWwEpkYhIqZq8ZD33fbyA8Zlr6dC0Dn8Y1IWf9W6l4dsrMCUSESmx/HxnfOZaHvtsIV9krqNJ3ZrcfGoXLjmmPTWqaQysii6uRGJmvwJedPcNCY5HRFKIuzNyxioeGpPJ7JWbaVK3Jn8Y1IXz+7aljoYxqTTi/VOhOTDZzF4zs4EWRx89M2ttZmPMbLaZzTKz60P57WaWZWbTw2tQzD43m1mmmc0zs1NiygeGskwzGx5T3t7MJobyV81Mj7+KlIHcvHxGzVrF6Q+M55qXp5Kzczf/PrsHXww/kWHHd1QSqWTM3ePbMEoeA4BLgHTgNeApd1+4l+3TgDR3n2pm9YApwJnAOcAWd79zj+27Aa8AfYAWwMfAwWH1fODHwApgMjDU3Web2WvAW+4+wsweBb5290f2dR3p6emekZER1zWLyA+5O6Nnr+av789m+frttGl8AL86qRNn9W6lSaQqODOb4u7pha2L+88Gd3czWwWsAnKBRsAbZjba3X9fyPYrgZVhOcfM5gAt93GKwcAId98JLDazTKKkApDp7ovCxYwABofjnQScF7Z5Drgd2GciEZH9l5cfJZCnv1jMpMXr6dCkDg+d15sfd2uuNhCJu43keuAiYC3wJPA7d99tZlWABcD/JJI99m8HHA5MBI4BrjWzi4AM4Leh7aUlMCFmtxV8n3iW71HeFzgQ2OjuuYVsv+f5hwHDANq0aVP0BYsIADtz83hzShbPfrmY+au30Lx+TW4/oxtD+rTRMCbynXjvSBoBZ7n70thCd883s9P3taOZ1QXeBG5w981m9gjwV8DD+13Apfsd+X5w98eBxyGq2krkuUQqgm27cnl18nIeHruQNTk76ZpWn7vP6ckZPVtQXTMRyh6KTCRmVhUY4u63F7be3efsY9/qREnkJXd/K2y/Omb9E8B/w8csoHXM7q1CGXspXwc0NLNq4a4kdnsRKYZtu3J55oslPDpuITk7cunbvjF3n9OTYzs10ThYsldFJhJ3zws9ptq4+7J4Dxwa558C5rj73THlaaH9BOCnwMyw/B7wspndTdTY3hmYBBjQ2czaEyWKIcB5oc1mDHA2MAK4GHg33vhE5HuL1mzhsXGLeH/GSrbszOVHXZtz5QkdSG/bSAlEirQ/VVuzzGwSsLWg0N1/so99jgEuBGaY2fRQ9gdgqJn1IqraWgJcGY41K/TCmk3UmH+Nu+cBmNm1wCigKvC0u88Kx7sJGGFmfwOmESUuEYnT4rVbuf+TBbw9LYsa1arwk54tGHJka9LbNU52aJJC4ur+a2YnFFbu7uNKPaIEU/dfEcjauJ2Hx2QyYvJyqlc1LjqqHZcf155m9WolOzQpp0rc/TcVE4aI/K/szTt4ZNxCXpq4jPx8Z8iRrbn+5M40q68EIsUXb/fffsADQFegBlEV01Z3r5/A2ESklCxeu5UXvlrKaxnL2bYrl58e3opf/7izRuKVUhFvG8mDRI3crxM91X4R3z91LiLlVGb2Fh4em8k707IwMwZ1T+P6kzvRqZnmApHSsz9PtmeaWdXQAP6MmU0Dbk5caCJSXNk5O3hq/GKe/HwxBlxyTHuuPKGD2kAkIeJNJNvCgIjTzexfREOf6KkkkXJm3qocnh6/mHemZ7E7L58zerbgT6d3o4nmApEEijeRXEjULnIt8GuiBwR/lqigRCR+7s64+Wt4/LNFfLlwHbWqV+HMXi254vgOdGpWN9nhSSUQb6+tgqFRtgN/Tlw4IhKv/Hznk7nZPDI2k6nLNpLWoBY3DjiY8/u2pVEdzaggZWeficTMZhA9OFgod+9R6hGJyD7l5zsfzlrFPaPnsyB7C60a1eYvgw9laJ82GgdLkqKoO5J9DsgoImUnNy+fd6Z/y5OfL2Luqhw6NKnDXT/vyeBeLaimBCJJtM9EsudovyKSHJ/OXc0/P5jL/NVb6HJQPf51dg/OOrylEoiUC/E+kJjD91VcNYDq6IFEkYT7bP4a7v9kARlLN9C+SR3uG9KLn/RsoYEUpVyJt7H9u6eXwqi+g4F+iQpKpLLLWLKeez6ezxeZ62jRoBa3nt6NC/q11WyEUi7F/UBiAY9GeXzHzG4Dhpd+SCKVU8F86E9+vphJS9ZzYJ0a3HJaVy48qi01q2k2Qim/4q3aOivmYxWiYVJ2JCQikUpoxopN/OW/s5i8ZANpDWrxx0FdOb9fGw6osd9/64mUuXh/Ss+IWc4lmkdkcKlHI1LJLF23lQc+zeTNqSs4sE4N/nrmYQw9srUa0SWlxNtGckmiAxGpTL5evpGHx2by0ezVVDXj8mPbc+2JnWlwQPVkhyay3+Kt2uoA3EfUwO7AV8Cv3X1RAmMTqXCyc3bwz5FzeWtaFg0PqM7V/TtyQb+2pDWonezQRIot3qqtl4GHiOZYh2hI+VeAvokISqSi2b4rj6fGL+KRsQvZnef88oSOXH1iR+rX0h2IpL54E8kB7v5CzOcXzex3iQhIpCLZuG0Xz3yxhJcmLmXtll2c1KUZfxjUVYMpSoVS1FhbjcPiB2Y2HBhBVLV1LjAywbGJpKzsnB28MnE5T41fxOYduZzUpRnDju9Avw4HJjs0kVJX1B3JFKLEUfAY7ZUx6xxNbCXyA1t25vLsF4t5ZOxCtu7K4+QuzfjNgIM5tEWDZIcmkjBFjbXVviQHN7PWwPNAc6LE87i73xfudF4F2hF1JT7H3TeEp+bvAwYB24BfuPvUcKyLgVvCof/m7s+F8iOAZ4HaRHdJ14eHJkXKTG5ePi9NXMa9H89nw7bdnNSlGTef2oXOzTWlrVR8cT/tZGaHAd2A7+bqdPfni9gtF/itu081s3rAFDMbDfwC+MTd/xmqzIYDNwGnAp3Dqy/wCNA3JJ7biB6E9HCc99x9Q9jmCmAiUSIZCHwQ73WJlNS0ZRv407szmZm1mX4dGjP81K70at0w2WGJlJl4u//eBvQnSiQjiX7hjye629grd19JNC0v7p5jZnOAlkQPM/YPmz0HjCVKJIOB58MdxQQza2hmaWHb0e6+PsQzGhhoZmOB+u4+IZQ/D5yJEomUgayN2/m/92czcsYqmtStyQNDD+f0HmkaUFEqnXjvSM4GegLT3P0SM2sOvLg/JzKzdsDhRHcOzUOSAVhFVPUFUZJZHrPbilC2r/IVhZTvee5hwDCANm3a7E/YIv8jNy+ft6Zm8fcP5rBjdx7XntiJX/bvSN2aGs5EKqd4f/K3u3u+meWaWX0gm2je9riYWV3gTeAGd98c+xebu7uZJbRNw90fBx4HSE9PV/uJFNvSdVu5+qWpzPp2M4e3aci/z+5Bp2ZqB5HKLd5EkmFmDYEniHpybSF6ur1IZladKIm85O5vheLVZpbm7itD1VV2KM/ihwmqVSjL4vuqsILysaG8VSHbi5Sq3Lx8nv1yCf8aNY/a1aty35BenNGjBVWqqBpLJN6xtq4Oi4+a2YdE7RLfFLVf6IX1FDDH3e+OWfUecDHwz/D+bkz5tWY2gqixfVNINqOAv5tZo7DdAOBmd19vZpvNrB9RldlFwAPxXJNIvCYsWsef/zObOSs3c1znJvzjrO60anRAssMSKTfibWz/KfCpu29y9yWhEfxMd3+niF2PAS4EZpjZ9FD2B6IE8pqZXQYsBc4J60YSdf3NJOr+ewlASBh/BSaH7f5S0PAOXM333X8/QA3tUkqWrtvKPaPn8870b0lrUEuN6SJ7YfE8cmFm09291x5l09z98IRFliDp6emekZGR7DCkHNu2K5d7P17AM18sxjAuO64915/cmVrVNbmUVF5mNsXd0wtbF28bSWGTI6iLilQ4Y+Zm8+f/zGLJum2cfUQrbhxwCAc1qFX0jiKV2P40tt9NNAIwwDVEje4iFcLqzTu4Z/R8RkxeTqtGtXn58r4c3alJssMSSQnxJpJfAX8iGtYEYDRRMhFJaXn5zhOfL+Ke0fPJzXeGHd+B351yCNU1Q6FI3OLttbUVGB6GOXF335LYsEQS76uF6/jHB3P4ZsUmftS1GX86vRttD6yT7LBEUk68vba6Ew2H0jh8Xgtc7O4zExibSEKs27KTez6ez4sTltGyYW3u/HlPfta7pXpjiRRTvFVbjwG/cfcxAGbWn+hJ8aMTFJdIqdu2K5cnPlvMU+MXkbMzl0uOacfvT+lC7RrqjSVSEvEmkjoFSQTA3ceameoAJCW4O58vWMst78xk2fpt/Khrc24aeIiGeBcpJfEmkkVm9iegYLrdC4BFiQlJpPTMW5XD396fzecL1tKm8QG8fEVfju6o3lgipSneRHIp8GegYKysz0KZSLmUnbODf304jzenrqB+reoMP7ULlxzTjprVVI0lUtriTSTt3f26hEYiUgrcnXemZ3Hru7PYuTufy49tz1X9O9G4To1khyZSYcWbSO4ys4OAN4BX1VtLyqNVm3bwuze+5vMFa+ndpiH/OrsnnZrVTXZYIhVevM+RnBgSyTnAY2FOklfd/W8JjU4kDrvz8nnhq6Xc+/F8dubmc8tpXbnkmPZU1RDvImUi7vGy3H0VcL+ZjQF+D9wKKJFIUs3+djM3vv41s1dG86X//afd6dBUdyEiZSneBxK7AucCPwPWEQ2V8tsExiWyTzk7dnPnqHm8NHEZ9WtX59ELejPwsLRkhyVSKcV7R/I0MAI4xd2/TWA8IkV6e9oK/u/9uazdspOhfVrzu1O6qDFdJInibSM5KtGBiBQlO2cHv3/jG8bOW0PP1g158uJ0erVumOywRCo9zSki5V5uXj4vTFjKHR/OJd/hD4O6cNmxHdSYLlJOKJFIufbZ/DX8feQc5q7K4bjOTbjtjEPVpVeknNnnpAtm9kJ4v75swhGJ7Nidx63vzuSipyexbVce9w3pxfOX9lESESmHirojOcLMWgCXmtnzwA/qEtx9fcIik0pr/IK13PruTBat3cpFR7XlD4O6ar50kXKsqETyKPAJ0IFoat3YROKhXKRU5OU7j45byJ0fzaN1owN47tI+nHBw02SHJSJF2GfVlrvf7+5dgafdvYO7t495FZlEzOxpM8s2s5kxZbebWZaZTQ+vQTHrbjazTDObZ2anxJQPDGWZZjY8pry9mU0M5a+amfqApqiJi9ZxxgPj+feoeQzo1pxRNxyvJCKSIuKamNrdrzKznmZ2bXj1iPP4zwIDCym/x917hddIADPrBgwBDg37PGxmVc2sKvAQcCrQDRgatgW4IxyrE7ABuCzOuKSc2J2XzwOfLGDoExPYsG0XD553OI9ecIQmmxJJIXElEjO7DngJaBZeL5nZr4raz90/A+JtRxkMjHD3ne6+GMgE+oRXprsvcvddRA9GDrZoXtSTiAaSBHgOODPOc0k5kJmdw08e/IK7Rs/n1MPS+PCG4zm9RwtNeSuSYuLt/ns50NfdtwKY2R3AV8ADxTzvtWZ2EZAB/NbdNwAtgQkx26wIZQDL9yjvCxwIbHT33EK2/wEzGwYMA2jTpk0xQ5bSkpuXz5PjF3P3R/M5oGZVHjm/N6d21/AmIqkqrjsSokb2vJjPeezRg2s/PAJ0BHoBK4G7inmcuLn74+6e7u7pTZuq3j2ZZqzYxJkPf8E/P5jL8Qc35aMbjlcSEUlx8d6RPANMNLO3w+czgaeKc0J3X12wbGZPAP8NH7OA1jGbtgpl7KV8HdDQzKqFu5LY7aWccXdenrSMW9+dReM6NXhg6OGc3iNN1VgiFUC8Y23dbWZjgWND0SXuPq04JzSzNHdfGT7+FCjo0fUe8LKZ3Q20ADoDk4jufDqbWXuiRDEEOM/dPQxpfzZRu8nFwLvFiUkSa9GaLdz67izGZ67luM5NePC83jSoXT3ZYYlIKdmf+UimAlP35+Bm9grQH2hiZiuA24D+ZtaL6DmUJcCV4fizzOw1YDaQC1zj7nnhONcCo4CqRF2RZ4VT3ASMMLO/AdMo5l2SJEZuXj73f5rJo+MWUrNaFf78k0O5sF9bqmiMLJEKxdw92TGUqfT0dM/IyEh2GBXe6s07+O1rXzM+cy0DujXnr2ceRvP6tZIdlogUk5lNcff0wtZp0EYpdV8uXMt1r0wjZ0cud/ysO+ceqZ5yIhVZ3InEzNoCnd39YzOrDVRz95zEhSapxt157LNF3DlqHm0aH8BLl/fjkIPqJTssEUmweKfavYLoOYzGRF13WxGNw3Vy4kKTVBI76dQphzbnX2f3VIO6SCUR7x3JNURPmE8EcPcFZtYsYVFJShk7L5sbX/+azTty+eOgrlx2bHs1qItUIvEmkp3uvqugz7+ZVSPqdSWV2KZtu/nr+7N5Y8oKuhxUjxcu60vXtPrJDktEyli8iWScmf0BqG1mPwauBv6TuLCkvBszN5ub3vyGdVt38csTOnL9yZ010KJIJRVvIhlONLLuDKLnPka6+xMJi0rKrbVbdnLzWzMYPXs1nZrV5amLj6R7qwbJDktEkijeRHI+0ci83yUPMzvd3f+7j32kgvly4Vp++9rXrN+6i9+dcgiXHdteMxeKSNyDNj4AfG5mXWPK/pKAeKQccneeGr+YC56cSK3qVXnzqqO55sROSiIiAsR/R7KYqGrrDTO73d1fp/ij/0oK+Xbjdm55Zyafzs3m5C7NuG/o4dStqedYReR78f5GcHefamYnAK+YWV+ica+kApu+fCO/fGEKm3fs5qaBXbjy+A7q1isi/yPeqq2VAO6+FjiFqOvvYYkKSpLL3XktYznnPvYVVasYb151NFf176gkIiKFincY+dNilvOB34WXVDAbt+3iuhHT+Wz+Go5s14hHLjiCJnVrJjssESnH9plIzOxed7/BzP5DIQ8guvtPEhaZlLkJi9Zx4+tfk715J7ed0Y0L+7WlWtV4b1pFpLIq6o7khfB+Z6IDkeQaMWkZt7wzk7SGtXjx8r70ad842SGJSIrYZyJx9ynhfRyAmVUnahvJcvfsxIcnibYzN487Rx71KqgAABRySURBVM3jic8Xc2ynJjx8QW/q19JgiyISv33WW5jZo2Z2aFhuAHwNPA9MM7OhZRCfJNDaLTsZ+vgEnvh8Mef3bcNTv0hXEhGR/VZU1dZx7v7LsHwJMN/dzzSzg4APgFcSGp0kzKxvN3HFcxms3bqLB887nNN7tEh2SCKSoopKJLtiln8MvA7g7qsKRgKW1DN+wVqGvZBBg9rVefOXR2usLBEpkaISyUYzOx3IAo4herq9YBj52gmOTUqZu/Pw2IXc9dE8Ojaty3OX9qFFQ32NIlIyRSWSK4H7gYOAG9x9VSg/GXg/kYFJ6dqxO49b3pnJG1NWcFqPNP5xVne1h4hIqdhnY7u7z3f3ge7ey92fjSkf5e6/LergZva0mWWb2cyYssZmNtrMFoT3RqHczOx+M8s0s2/MrHfMPheH7ReY2cUx5UeY2Yywz/2m+rZCbdq+m6FPTOCNKSu49sROPDj0cCURESk1iX7a7Flg4B5lw4FP3L0z8En4DHAq0Dm8hgGPQJR4gNuAvkTT/d5WkHzCNlfE7LfnuSq95eu38fNHv2Rm1iYePO9wbjzlEJRvRaQ0JTSRuPtnwPo9igcDz4Xl54AzY8qf98gEoKGZpRGN7TXa3de7+wZgNDAwrKvv7hPc3Ym6JZ+JfGfasg389OEvWblpB8/8oo96ZolIQiRj/Ivm7r4yLK8CmofllsDymO1WhLJ9la8opPx/mNkwM8sws4w1a9aU/ApSwIczV3HOY19Rq3oV3rrqaI7t3CTZIYlIBbXficTMSm1WxHAn8T9jeJU2d3/c3dPdPb1p06aJPl1SuTv3fjyfX744hUMOqse71xxD5+b1kh2WiFRgxbkjKfSv/v2wOlRLEd4LhlrJAlrHbNcqlO2rvFUh5ZVWfr5zyzszuffjBZx1eEve+OXRHKiRe0UkwYqTSKaV8JzvAQU9ry4G3o0pvyj03uoHbApVYKOAAWbWKDSyDwBGhXWbzaxf6K11UcyxKh1350/vzuSlicsYdnwH7vx5T02FKyJlYr/nTHX3S+Pd1sxeAfoDTcxsBVHvq38Cr5nZZcBS4Jyw+UhgEJAJbCMakgV3X29mfwUmh+3+4u4FDfhXE/UMq000ZMsH+3s9FYG78/eRc3hp4jKuPKEDwwd2Uc8sESkzFjVTVB7p6emekZGR7DBKTX6+89f3Z/PMF0s4v28b/nbmYUoiIlLqzGyKu6cXtm6/70ik/MgLbSKvTFrGL45ux21ndFMSEZEyp0SSonLz8vnj2zN5NWM5V/XvyO/1oKGIJElRU+1eFOdxprv7N6UQj8Rhx+48fvPadEbOWMXV/Tvy+4Fdkh2SiFRiRd2RtI/zOEtKGIfEKWfHbi56ehLTlm3kpoFduKp/x2SHJCKVXFGJ5O/uvrtMIpEirduyk4ufmcSclTk8fH5vBnVPS3ZIIiJFPkeSZWZPmtnJGlk3uRav3cpZj3zJgtVbePzCI5RERKTcKCqRdCV6fuMWYLmZ3RceFpQylJm9hSGPf0XOjlxevqIvJ3dtXvROIiJlpKj5SNa5+2PufiLREO6LgHvMbKGZ/V+ZRFjJLV+/jUufnUxevvPKFf04om3jZIckIvIDcQ+R4u7fAk8RzQGSA1yeqKAksnbLTs597Cs2bNvFExelc8hBGnxRRMqfIhOJmdUys5+b2VtEw5ecRDQZlSa3SKBN23Zz2XMZrNu6ixcv68vhbRoVvZOISBIU9RzJy8CPgHHAS8B57r6jLAKrzDbv2M0FT01k7qrNPHReb3q2bpjskERE9qqo7r8fAle6e05ZBCPRw4bDns9g7qrNPHbhEZzURQ3rIlK+FZVIxgIFw7fvy0Z331w6IVVeu3LzueL5DCYuXs895/RSEhGRlFBUInmuiPUQzXD4LNGc6VJM7s5Nb37D5wvWcsfPunPm4SWdP0xEpGzsM5GEbr+SYO7OXR/N5+1pWfzmxwdz7pFtkh2SiEjcijNDopSyR8Yt5MExmZyT3opfndQp2eGIiOwXJZIkGzVrFf8eNY/TeqRxx896aCh4EUk5SiRJlJm9hV+/Op0erRpy59k9lUREJCUpkSTJhq27uOy5ydSsVoVHL+hN7RpVkx2SiEixaIbEJNiZm8eVL05h5cYdvHxFX9Ia1E52SCIixaZEkgT/GDmXSYvXc9+QXqS30yCMIpLaVLVVxsbNX8OzXy7hgn5tGNxLz4qISOpLWiIxsyVmNsPMpptZRihrbGajzWxBeG8Uys3M7jezTDP7xsx6xxzn4rD9AjO7OFnXE4/M7Byue2UahzSvx/BTuyY7HBGRUpHsO5IT3b2Xu6eHz8OBT9y9M/BJ+AxwKtA5vIYRDWWPmTUGbgP6Es2Xclscw7kkxcZtu7jsuQyqV63C4xcdQd2aqlUUkYoh2YlkT4P5fliW54AzY8qf98gEoKGZpQGnAKPdfb27bwBGAwPLOuii5Oc7v3nta7I2bOfh83vT9sA6yQ5JRKTUJDOROPCRmU0xs2GhrLm7rwzLq4CCUQtbAstj9l0RyvZW/gNmNszMMswsY82aNaV5DXF59sslfDo3mz+e1pU+7dW4LiIVSzLrV4519ywzawaMNrO5sSvd3c3MS+NE7v448DhAenp6qRwzXovWbOGfH87l5C7N+MXR7cry1CIiZSJpdyTunhXes4G3ido4VocqK8J7dtg8C2gds3urULa38nIhP9/507szqVG1Cv84q7ueXBeRCikpicTM6phZvYJlYAAwE3gPKOh5dTHwblh+D7go9N7qB2wKVWCjgAFmVjBnyoBQVi68MnkZX2Su4+ZBXWhWv1aywxERSYhkVW01B94Of6FXA1529w/NbDLwmpldBiwFzgnbjwQGEc0Zvw24BMDd15vZX4HJYbu/uPv6sruMvVu6biv/HDmXvu0bc14fDQsvIhVXUhKJuy8CehZSvg44uZByB67Zy7GeBp4u7RhLIj/fuX7EdDC48+cajFFEKjY9zJAAL05cyvTlG7n33F60bnxAssMREUmo8vYcScpbv3UXd46ax9EdD2RwrxbJDkdEJOGUSErZP0bOYfvuPG7/yaGq0hKRSkGJpBSNmZfN61NWcOmx7Tm4eb1khyMiUiaUSErJ9l153PL2TDo3q8uvf3RwssMRESkzamwvJY99tpCsjdsZMawftaprtkMRqTx0R1IKlq/fxqPjFjKo+0H063BgssMRESlTSiSl4O8j5wBwy2ndkhyJiEjZUyIpoclL1vPBzFVc3b8TLRpq7nURqXyUSEro7o/m06RuDa44rkOyQxERSQolkhL4fMEavlq0jqv6d6J2DTWwi0jlpERSTO7OXR/Np2XD2pzfV4MyikjlpURSTF9krmP68o1cc2IndfcVkUpNiaSYnvliMU3q1uRnR/zPzL4iIpWKEkkxLFidw6fzsjmvT2tqVtPdiIhUbkokxfDSxGVUq2L84pj2yQ5FRCTplEj207Zdubw5ZQWndU+jcZ0ayQ5HRCTplEj203vTvyVnZy4X9Gub7FBERMoFJZL99Na0LDo1q8sRbRslOxQRkXJBiWQ/rNq0g8lL1nNa9zRNWiUiEiiR7Id3p2fhDmf1VpdfEZECFSKRmNlAM5tnZplmNjxR5/lkTjbd0urT9sA6iTqFiEjKSflEYmZVgYeAU4FuwFAzK/Xx3LftymXqsg2ccEjT0j60iEhKS/lEAvQBMt19kbvvAkYAg0v7JNOWbSQ33+nbvnFpH1pEJKVVhETSElge83lFKPuOmQ0zswwzy1izZk2xTlKjWhV+1LUZPVs1LH6kIiIVUKWYs93dHwceB0hPT/fiHOPIdo05sp3uRkRE9lQR7kiygNYxn1uFMhERKQMVIZFMBjqbWXszqwEMAd5LckwiIpVGyldtuXuumV0LjAKqAk+7+6wkhyUiUmmkfCIBcPeRwMhkxyEiUhlVhKotERFJIiUSEREpESUSEREpESUSEREpEXMv1vN5KcvM1gBLi7l7E2BtKYaTTBXlWnQd5Yuuo/wprWtp6+6FDjZY6RJJSZhZhrunJzuO0lBRrkXXUb7oOsqfsrgWVW2JiEiJKJGIiEiJKJHsn8eTHUApqijXousoX3Qd5U/Cr0VtJCIiUiK6IxERkRJRIhERkRJRIomTmQ00s3lmlmlmw5MdTwEzW2JmM8xsupllhLLGZjbazBaE90ah3Mzs/nAN35hZ75jjXBy2X2BmF8eUHxGOnxn2tVKK+2kzyzazmTFlCY97b+co5eu43cyywncy3cwGxay7OcQ0z8xOiSkv9OcrTI8wMZS/GqZKwMxqhs+ZYX27El5HazMbY2azzWyWmV0fylPqO9nHdaTid1LLzCaZ2dfhWv5c3POX1jXulbvrVcSLaHj6hUAHoAbwNdAt2XGF2JYATfYo+xcwPCwPB+4Iy4OADwAD+gETQ3ljYFF4bxSWG4V1k8K2FvY9tZTiPh7oDcwsy7j3do5Svo7bgRsL2bZb+NmpCbQPP1NV9/XzBbwGDAnLjwJXheWrgUfD8hDg1RJeRxrQOyzXA+aHeFPqO9nHdaTid2JA3bBcHZgY/v326/yleY17jbU0filU9BdwFDAq5vPNwM3JjivEsoT/TSTzgLSwnAbMC8uPAUP33A4YCjwWU/5YKEsD5saU/2C7Uoi9HT/8BZzwuPd2jlK+jtsp/JfWD35uiObQOWpvP1/hF8laoNqeP4cF+4blamE7K8Xv5l3gx6n6nRRyHSn9nQAHAFOBvvt7/tK8xr29VLUVn5bA8pjPK0JZeeDAR2Y2xcyGhbLm7r4yLK8CmoflvV3HvspXFFKeKGUR997OUdquDVU+T8dU1ezvdRwIbHT33D3Kf3CssH5T2L7EQpXI4UR/Aafsd7LHdUAKfidmVtXMpgPZwGiiO4j9PX9pXmOhlEhS37Hu3hs4FbjGzI6PXenRnxQp18e7LOJO4DkeAToCvYCVwF0JOEdCmFld4E3gBnffHLsulb6TQq4jJb8Td89z915AK6AP0CXJIRVKiSQ+WUDrmM+tQlnSuXtWeM8G3ib6YVttZmkA4T07bL6369hXeatCyhOlLOLe2zlKjbuvDr8A8oEniL6T4lzHOqChmVXbo/wHxwrrG4Tti83MqhP98n3J3d8KxSn3nRR2Han6nRRw943AGKJqpv09f2leY6GUSOIzGegcejLUIGrIei/JMWFmdcysXsEyMACYSRRbQW+Zi4nqiQnlF4UeN/2ATaFKYRQwwMwahVv+AUR1oiuBzWbWL/SwuSjmWIlQFnHv7RylpuCXYvBTou+k4NxDQu+a9kBnogboQn++wl/nY4CzC4k39jrOBj4N2xc3ZgOeAua4+90xq1LqO9nbdaTod9LUzBqG5dpEbT1zinH+0rzGwpVWo1ZFfxH1UplPVEf5x2THE2LqQNTT4mtgVkFcRHWcnwALgI+BxqHcgIfCNcwA0mOOdSmQGV6XxJSnE/2nWwg8SOk1Hr5CVMWwm6gO9rKyiHtv5yjl63ghxPlN+E+cFrP9H0NM84jpAbe3n6/wHU8K1/c6UDOU1wqfM8P6DiW8jmOJqpS+AaaH16BU+072cR2p+J30AKaFmGcCtxb3/KV1jXt7aYgUEREpEVVtiYhIiSiRiIhIiSiRiIhIiSiRiIhIiSiRiIhIiSiRSMoyMzezu2I+32hmt+/nMZ61aFTYmuFzEzNbUkrx9Tez/5bGsYo4T9MwUus0Mztuj3U3mNkBcRxjS+IilIpOiURS2U7gLDNrUsLj5BE9+1CumFnVODc9GZjh7oe7++d7rLuBaMA/kYRRIpFUlks0H/WvS3ice4FfxwwJAfzvHYWZPWhmvwjLS8zsHxbmgTGz3mY2yswWmtkvYw5T38zet2jOh0fNrErYf4CZfWVmU83s9TA2VMFx7zCzqcDP94innZl9atHAg5+YWRsz60U0DPvgEEvtmO2vA1oAY8xsTCgbatGcIDPN7I49/yHCHdlXZnZa+Pw7M5sczvnnmDjmmNkTFs2T8VHBec3sOovmAvnGzEYU7+uQVKNEIqnuIeB8M2tQgmMsA8YDF+7vfh4NqPc58CzRkBL9gD/HbNMH+BXRnBAd+f4O6hbgRx4NuJkB/CZmn3Xu3tvd9/xF/ADwnLv3AF4C7nf36cCtRHNP9HL37QUbu/v9wLfAie5+opm1AO4ATiIavPBIMzuzYHszaw68T/QE9ftmNoBoOI0+Yfsj7PtBQTsDD7n7ocBG4GehfDhweIgxNqFKBVat6E1Eyi9332xmzwPXAduL2n4f/kE0ntD7+7FPwXhrM4gmIMoBcsxsZ8EYScAkd18EYGavEA3hsYMosXwRDQ1FDeCrmOO+upfzHQWcFZZfILoT2R9HAmPdfU2I5yWiibneIZo46RPgGncfF7YfEF7Twue6RAlkGbA4JDGAKURzskA0nMdLZvZOOK5UAkokUhHcSzTpzzOFrTSzUURzXGS4++WFbePuCyya9+GcmOJcfnjXXmuP3XaG9/yY5YLPBf+39hyDyInGqRrt7kMLvRrYupfyRMolSginAAWJxIB/uPtjsRtaNM9H7PXmAQVVaqcRJaczgD+aWXf/fl4LqaBUtSUpz93XE00Netle1p8Sqn0KTSIx/g+4MebzUqBbGDW1IVGj9v7qE0ZXrQKcS1SFNgE4xsw6wXejOB8cx7G+JBqhFeB8oiq1ouQQTTkL0SB8J4R2kKpEsxQWJA0n6nDQxcxuCmWjgEtj2m9amlmzvZ0oXGNrdx8D3EQ0jHndOGKUFKc7Eqko7gKuLckB3H1WaOTuHT4vN7PXiEZeXcz3VTz7YzLRSLediIbmftvd80Oj/SsF3Y6J2kzmF3GsXwHPmNnvgDXAJXGc/3HgQzP7NrSTDA9xGPC+u383PLi755nZUOA9M8tx94fNrCvwVaiC2wJcQHQHUpiqwIuhvcqI2nA2xhGjpDiN/isiIiWiqi0RESkRJRIRESkRJRIRESkRJRIRESkRJRIRESkRJRIRESkRJRIRESmR/wdnK1o3o/TkYAAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.plot(N, V)\n",
"plt.xlabel('N - Number of tokens')\n",
"plt.ylabel('|V| - Size of vocabulary')\n",
"plt.title('Heaps\\' Law')"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1000 610\n",
"10000 3597\n",
"20000 5920\n",
"30000 7753\n",
"50000 10806\n",
"100000 16675\n"
]
}
],
"source": [
"for i in [1000, 10000, 20000, 30000, 50000, 100000]:\n",
" print(i, V[i-1])"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"from math import log"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.3188109468386284\n",
"0.6824809994294719\n",
"1.3976147883124628\n",
"0.655343979207257\n",
"1.5589214096765935\n",
"0.6405478197637083\n"
]
}
],
"source": [
"print(8385/6358)\n",
"print(log(1.3188, 3/2))\n",
"print(11719/8385)\n",
"print(log(1.397614, 5/3))\n",
"print(18269/11719)\n",
"print(log(1.558921, 10/5))"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"10.340940789558791"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"11719 / 50000 ** 0.65"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 1.0, \"Heaps' Law\")"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAEWCAYAAABMoxE0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3dd3hVVdbA4d9Ko/cmHUQQsYAaimJvCBbsYsWKY++KM/qBozPqjH1QEBRFpYoNlSIgTZQSeodQE1qAFBICqev74+yMVyaQG+69uSnrfZ775Jx92jremMXZe5+9RVUxxhhjjlVEuAMwxhhTtlkiMcYYExBLJMYYYwJiicQYY0xALJEYY4wJiCUSY4wxAbFEYowxJiCWSIwBRGSLiFxyWNldIvJruGJyMRzxRa/CYjYmHCyRGGOMCYglEmP8JCJNRORrEdkjIptF5DGfbV1E5HcRSRWRnSIySERifLariDwmIptEZK+I/FtEIty2E0RkloikuW1jA4yzjoj86OJMccvN3LYLRWSFz75TRWShz/ocEbkmkOubiscSiTF+cH/0fwCWAU2Bi4EnRKSH2yUPeBKoD5zltj902GmuBWKBM4DewD2u/BXgZ6AO0Az4T8EBqirHEG4E8CnQEmgBHAQGuW3zgLYiUl9EooHTgCYiUkNEqrj45hzDNU0FFhXuAIwpRb4TkVyf9RhgsVvuDDRQ1b+79U0iMgzoA0xR1UU+x20RkY+A84F3fcrfUNVkIFlE3gVuAT4GcvD+6DdR1UQgoHYZVd0HfF2wLiL/AGa4bQfdE8h5wA68xJgKdAeygA3ueGP8Zk8kxvzhGlWtXfDhz08ULfH+5Z5a8AH+CjQCEJF2rgppl4jsB/6J93TiK8FneSvQxC0/BwiwQERWicg9BEBEqorIRyKy1cUyG6gtIpFul1nABXjJZBYwEy/pne/WjSkWSyTG+CcB2OybaFS1hqr2ctsHA2uBtqpaEy/JHF4t1dxnuQXeEwGquktV71fVJsADwIcickIAsT4NnAh0dbGc58oL4jk8kczCEokJgCUSY/yzAEgXkedFpIqIRIrIKSLS2W2vAewHMkSkPfBgIed41jWENwceB8YCiMiNBY3hQAqgQL6fcUWLSGWfT5SL5SCQKiJ1gQGHHfMbXqLpAixQ1VV4T1xd8Z5ejCkWSyTG+EFV84ArgU7AZmAvXvtGLbfLM8CtQDowDJckDvM9sAhYCvwEfOLKOwPzRSQDmAA8rqqb/AxtIl7SKPgMxGuXqeJinAdMPuxeDuC1/axS1WxX/DuwVVWT/LyuMf8lNrGVMaHnXixsq6rx4Y7FmGCzJxJjjDEBsURijDEmIFa1ZYwxJiD2RGKMMSYgFe7N9vr162urVq3CHYYxxpQpixYt2quqDQrbVuESSatWrYiLiwt3GMYYU6aIyNYjbbOqLWOMMQGxRGKMMSYglkiMMcYExBKJMcaYgFgiMcYYExBLJMYYYwJiicQYY0xALJEYY0x5l5kMc9+DLQHN4nxEFe6FRGOMqTB2LIWFw2DFeMg9BN2fgFbnBP0ylkiMMaY8yc2C1d/DgmGQuACiq0LHW6DL/dDo5JBc0hKJMcaUB2mJEPcpLB4BB/ZA3TZw+eteEqlSO6SXtkRijDFllSpsmQMLhsLaiaD50O5y7+nj+AshIoJt+zKZvHAjk1bu4qrTmnDPOa2DHoYlEmOMKWuy0mHZGFj4MexZC1XqwNmPQOw9UKcViSmZjJsez7TVu1m9cz8ApzStSd1qMSEJxxKJMcaUFXvWeclj6WjITofGnaD3h3DKdWRqNFNX7+b77xcyY10SApzZsg4v9GxPz1Ma06Je1ZCFZYnEGGNKs7xcWD/Zq77aPAsiY+Dka6FLPw7U78ic+H38MG4NM9clcSA7j/rVY3jkwhPo06UFTWtXKZEQLZEYY0xplJkMS77wel+lJUDNpnDRS2R3vINFe6P4ceEOvl/6CxlZudSrFsNVHZtwxWmNObtNfSIjpERDtURijDGlye7VsOAjWDYWcg9Cy3Ogxz/YXP8Cvl++m1GDVpCUnkVMVARXnNqYG85sRpfWdYmODN/75ZZIjDEm3PLzYP0UmD8YNs+GqMpw6o3kxN7PpL0NGPXrVuZt8t5KP69dA165pgVntalHzcrRYQ7cY4nEGGPC5WAqLPnSa/9I3epVX108gPjm1zNqxQG+G76d5AOJtKxXlWcua8e1ZzQrsXaP4rBEYowxJW3POpj/ESwbDTmZ0OJsci9+man5sXw6L5EFP60kQuDSDo244czmXNS+YYm3exSHJRJjjCkJ+fkQPxXmDYZNMyCyEpx6I6mn3s3YxDqMnZLApr3LaVijEi9ecRJXd2pCwxqVwx21XyyRGGNMKB1Kg6WjvCeQlM1QozF64Yv8Ur0XX689xLThSWTn7aZj89p8eNsZXNqhUVgbzo+FJRJjjAmFvRu8to+loyA7A5p3ZUfss4xL78S3C5LYum8L9avHcGvXFtzatQXtGtUId8THzBKJMcYES34+bPzF630VPw0iY8g/+Trm1b+BD9fX5Ncf9iKyhc4t6/LYRW3p3akJUWXs6aMwlkiMMSZQ2ZmwfIzX/rF3PVRvRGKnJxmWeT7fLs9m/6FsjquZwXOXn8hNsc2pX71SuCMOqpAmEhGpDMwGKrlrjVfVASLSGhgD1AMWAXeoaraIVAI+B84E9gE3q+oWd64XgHuBPOAxVZ3iyi8H3gMigY9V9fVQ3pMxxvxX+i7vzfO44XAwGT2uI4vPfJ23d5zM3Hnp1KyczUXtG3JVxyac365BuXj6KEyon0iygItUNUNEooFfRWQS8BTwjqqOEZEheAlisPuZoqoniEgf4A3gZhHpAPQBTgaaANNEpJ27xgfApUAisFBEJqjq6hDflzGmItu5DH7/EFZ+Dfm5HGxzOV/HXM276xuwd0s2TWvn8uIVJ3Fb15ZUiYkMd7QhF9JEoqoKZLjVaPdR4CLgVlc+AhiIl0h6u2WA8cAgERFXPkZVs4DNIhIPdHH7xavqJgARGeP2tURijAmu/Hxv8MR5H8KWOWh0Nba2vplh2ZcxZk00qsoFJ9bm1i4tuKh9QyJK8XsfwRbyNhIRicSrvjoB7+lhI5Cqqrlul0SgqVtuCiQAqGquiKThVX81Beb5nNb3mITDyrsWEkM/oB9AixYtAr8pY0zFkZXhuu8OhuRN5FZvwqzmjzIg4UwSV8VQr1oM953TjNu7taR53dAN1V6ahTyRqGoe0ElEagPfAu1Dfc1CYhgKDAWIjY3Vkr6+MaYMSkv03v1YPAIOpZFc5zRG1OrPoN0nI8lRXNqhEa92bs65bRuU6rfOS0KJ9dpS1VQRmQGcBdQWkSj3VNIM2O522w40BxJFJAqohdfoXlBewPeYI5UbY0zxJS6CeR+gq74DlMXVzuXfeRczb+cJtKxXlScubcZ1Z5bOMa/CJdS9thoAOS6JVMFrFH8DmAHcgNdzqy/wvTtkglv/3W3/RVVVRCYAo0TkbbzG9rbAAkCAtq4X2Ha8BvmCthdjjPFPXi6s/dFr/0iYT3ZUdX6IuYp39l9IZkRTrjizMY+dehzdWterUG0f/gr1E0ljYIRrJ4kAxqnqjyKyGhgjIq8CS4BP3P6fAF+4xvRkvMSAqq4SkXF4jei5wMOuygwReQSYgtf9d7iqrgrxPRljyousdFj8hdf+kbqN5JgmDOUuvsw4h9ZNG/PAhc254YxmFaLnVSDE61hVccTGxmpcXFy4wzDGhNP+nbDgIzRuOHIojbUxHXgn4zJmSRcuPaUJd53dkjNa1MHrNGoARGSRqsYWts3ebDfGVBxJa+C3QejysWh+HrMiuvJeVk92VzqFu3q04o3OzaldNSbcUZY5lkiMMeWbKmyZQ96v7xG5cRpZUokxORcwPK8XLduewn2xzeh5SuMK3/MqEJZIjDHlU14uuvo7Mme8Q7XklaRSi09zbuSXGlfS69yTGXNmMxrXsp5XwWCJxBhTruih/eyZ/QkxcUOonb2LXfmNGUE/Dp10A1fHtuGpNtbzKtgskRhjyoW0pG1smPAWJyZ+RUMOsDC/Pb8f9xDHdb6G509rSrVK9ucuVOy/rDGmTNu0Oo606W9zyt7JnE4+i6qew77T+tH53B50LmfDtZdWlkiMMWWO5uezdv5ksma9Q6dDCzioMcytdSVNej1Nl/Ydwx1ehWOJxBhTZmzds5/VM8bQeu1QTsrfwD5qMa/Vg7S74nEuaNA43OFVWJZIjDGl3pbdycz7bjBddnxBT9nJrsjGzOvwIh2vfIhuVauFO7wKzxKJMabUStiZxMof3uP07SPpIynsqHoi+y58meNib+C4CBu2pLSwRGKMKXVWb9hI4uS36br3W3rKATbWOJOUHs/T5JTLwIYtKXUskRhjSgVVZeGSJaROf4fzMibRnlzW1jmfnJ7P0+bEs8MdnjkKSyTGmLA6lJPHrNkzqLzgP3Q/NBuVCDY0voIWV/WnQ9MO4Q7P+MESiTEmLFIysvjl5+9otGIIPXQxmVRmw/F30OrKZ+lQr3nRJzClhiUSY0yJStqfydyfRtJ63VCuZz37I2qz5bSnaNnjMU6qWifc4ZljYInEGFMi1u9M4ddvh3DOri+4NmI7e6KOY2fXV2h8/n3UjKka7vBMACyRGGNCJj9fidu4kzWTh3LBnpHcE5FEUrU2JJ0ziIbdboFI+xNUHvj1LYrIo8CXqpoS4niMMeWAqjJ+3np2zhjKjVnf0EWS2VXzJNIu/jcNO14NERHhDtEEkb//HGgELBSRxcBwYIpWtDl6jTFFys9XpizewPZpg7jm4LfUl/3sqXcmhy4dynHtL7F3QMopvxKJqr4oIi8BlwF3A4NEZBzwiapuDGWAxpjSLzM7l5/mryZz9iCuyf6BWpLJ7obdye/1Nxq07h7u8EyI+f186Z5AdrlPLlAHGC8i/ypsfxFpLiIzRGS1iKwSkcdd+UAR2S4iS92nl88xL4hIvIisE5EePuWXu7J4EenvU95aROa78rEiYpMtG1OCMrNz+XTKPMa/dg89p11K35yxHGxyFnn3zaDRwxOJsCRSIfjbRvI4cCewF/gYeFZVc0QkAtgAPFfIYbnA06q6WERqAItEZKrb9o6qvnnYNToAfYCTgSbANBFp5zZ/AFwKJOJVsU1Q1dXAG+5cY0RkCHAvMNjfmzfGHJu9GVn8NGcBleYP4hadTozkkdz6Cqr1fIHjGp0c7vBMCfO3jaQOcJ2qbvUtVNV8EbmysANUdSew0y2ni8gaoOlRrtEbGKOqWcBmEYkHurht8aq6CUBExgC93fkuAm51+4wABmKJxJiQyczO5aufZ1N1wfvcIrOJEEhtdz31L+9P/Xptwh2eCZMiE4mIRAJ9VHVgYdtVdY0f52gFnA7MB7oDj4jInUAc3lNLCl6SmedzWCJ/JJ6Ew8q7AvWAVFXNLWR/Y0wQ7c3IYsLUGTRePojb838lLzKazJPvoPYlz1C/tr2FXtEVmUhUNc+1T7RQ1W3FvYCIVAe+Bp5Q1f0iMhh4BVD38y3gnuKet5gx9AP6AbRo0SKUlzKmXIlPSufbKb/QYcMQ7pLfyY6oRNIp99H48meJqdEo3OGZUqI4VVurRGQBcKCgUFWvPtpBIhKNl0RGquo37pjdPtuHAT+61e2A7z9tmrkyjlC+D6gtIlHuqcR3/z9R1aHAUIDY2FjrtmzMUagqi7elMmn6L3TcPJSnI+aTE1WZ1I4PUfeSp2lcrV64QzSljL+J5KXinlhEBPgEWKOqb/uUN3btJwDXAivd8gRglIi8jdfY3hZYAAjQVkRa4yWKPsCtqqoiMgO4ARgD9AW+L26cxhhPfr4yf3MyE36eSvcdw/lrxAJyo6twqPNjVD3vcSpZAjFH4O97JLOO4dzdgTuAFSKy1JX9FbhFRDrhVW1tAR5w11jl3k1Zjdfj62FVzQMQkUeAKUAkMFxVV7nzPQ+MEZFXgSV4icsYUwyHcvIYF5fA1Bkz6HNwFK9FLiA7phq5XZ8k5pxHialaN9whmlJO/HlBXUS6Af8BTgJi8P6gH1DVmqENL/hiY2M1Li4u3GEYE3ZZuXmMnr+NaTOmc1vWGHpGLiQnqjp0fYDo7o+AJRDjQ0QWqWpsYdv8rdoahFel9BUQi/dOSbujHmGMKZWycvP4etF2pv7yM30yR/NlZBy5lavD2c8T3e1BqGJDuZvi8XvoTVWNF5FIV930qYgsAV4IXWjGmGDKycvn+6U7mDx1MjcdGMWnkYvIqVwDPft5oiyBmAD4m0gy3fAjS92QKDspxvAqxpjwSTuYw+e/bWHBvFnceWgUH0cuIqdyTbT7C0R3/QtUqR3uEE0Z528iuQOvXeQR4Em87rjXhyooY0zgsnLz+GDGRubMncN9eWN4NHIBOZVrkH/2C0Sf9SBUrhXuEE054W+vrYKhUQ4CL4cuHGNMoDKychk9fxtT5vzKbYfG8ETkb2jlqnDWs0Sf9bBVYZmgO2oiEZEVeN10C6WqpwU9ImPMMYlPyuDz37fwW9xi+ul4xkbNgZgYIro9Bmc/DvYeiAmRop5ICh2Q0RhTeixNSOW9aetZs24tj0V/x4DImUhkFBGd/wLnPAnVG4Y7RFPOHTWRHD7arzGmdFBVZq7bw0ezN7Jx0yaeqPwDw6pMJ1IUOeMuOO8ZqNkk3GGaCsLf+UjS+aOKKwaIpoy+kGhMWZaXr8xcl8T70zeQkJjAU1Un0qfqz0RqDtLpVjjvWajTMtxhmgrG38b2GgXLbgyt3kC3UAVljPmz/Hxl8qpdvPXzOvbs2c1T1X7mtmoTico7iJx2M5z/HNh8ICZM/H4hsYCbcvc7ERkA9C9qf2PMsVNV5mzYy1tT17M+YRdP1ZzBXTW+IzonHU6+Fi54ARqcGO4wTQXnb9XWdT6rEXjDpBwKSUTGGFSVufH7+PeUtaxO3Md9Vecwpta3VMnaC+0uh4tehONODXeYxgD+P5Fc5bOcizdqb++gR2NMBZeXr/yyNokhszayeOs+7qy2kC/rfk2NzERodDZcMhJaWK2yKV38bSO5O9SBGFORFbSB/HvKOjbvzeC66qsYVn88dTPWQ41T4dp34YRLQCTcoRrzP/yt2joeeA+vgV2B34EnVXVTCGMzpkJYsi2F1yauZcGWZK6pu43xTcZRL3kxRLeG6z+Bk6+DCBvazpRe/lZtjQI+wJvRELwh5UcDXUMRlDHlXX6+MnvDHobP3cKcDXvoWmUHvzafQLM9syHiOLjibTjjToiMDneoxhTJ30RSVVW/8Fn/UkSeDUVAxpR3v23cy6s/rmH1zv2cXj2FSc0mcOKen5H0mnDJQOjyAMRUDXeYxvitqLG2CqZImyQi/fHmRlfgZmBiiGMzplzZsvcA/5qylokrdnFq7SymnTiZNglfIynR3lAm3R+zARVNmVTUE8kivMRR0ML3gM82xSa2MqZI63en8+nczYyLS6RWZBaj2v7GWbtGIglZcEZf72XCGseFO0xjjllRY221LqlAjClv1u7az3vTNjBp5S6qRClvHb+Uq1JGEJmQBCdd7VVj2dvophzw+812ETkF6ABULihT1c9DEZQxZdnqHfv558Q1/Bq/l2oxEbzTcQdX7/mIyMQN0Lwb9BkJzbuEO0xjgsbf7r8DgAvwEslEoCfwK3DURCIizd0+jfCqwoaq6nuu7WUs0Arv5cabVDXFjeP1HtALyATuUtXF7lx9gRfdqV9V1RGu/EzgM6CKi+1xN4yLMSUqPimDf09Zy5RVu6lVJZo3z8qm954hRK+bB/VOgJtHQvsr7F0QU+74+0RyA9ARWKKqd4tII+BLP47LBZ5W1cUiUgNYJCJTgbuA6ar6umvE7w88j5eg2rpPV2Aw0NUlngF4Q7OoO88EVU1x+9wPzMdLJJcDk/y8L2MCdiArl3enrWf43C1UjorgpbOrcPuBT6m0ZAJUa2BdeU25528iOaiq+SKSKyI1gSS8eduPSlV3AjvdcrqIrAGa4g2vcoHbbQQwEy+R9AY+d08U80Sktog0dvtOVdVkAJeMLheRmUBNVZ3nyj8HrsESiSkBOXn5jF6wjfenx7M3I4u7O9XguaoTqLL0My9pnP88nP0oVKpR5LmMKcv8TSRxIlIbGIbXkysD7+12v4lIK+B0vCeHRi7JAOzCq/oCL8kk+ByW6MqOVp5YSPnh1+4H9ANo0aJFccI2plDrdqXz3NfLWZaQytktqvJ9xyU0XfkRZGd4Tx8XvGA9sUyF4e9YWw+5xSEiMhnvKWC5vxcRkerA18ATqrpffOqIVVVFJKRtGqo6FBgKEBsba+0n5phlZufyyo+rGbswgWqVIvmq+3ZiN7yHLEqEE3vBxQOgYftwh2lMifK3sf1a4BdVTVPVLa7K6RpV/c6PY6PxkshIVf3GFe8WkcaqutNVXSW58u38ucqsmSvbzh9VYQXlM115s0L2NybofovfywvfriAhOZO/npbBXfuHErUoDo47Da4dAq3PDXeIxoSFvyPBDVDVtIIVVU3Fa/w+KtcL6xNgjaq+7bNpAtDXLfcFvvcpv1M83YA0VwU2BbhMROqISB3gMmCK27ZfRLq5a93pcy5jgmJ76kEeG72EWz+eT/28JOa1G8196/oRlZ4AvT+AfjMtiZgKzd82ksISjj/HdgfuAFaIyFJX9lfgdWCciNwLbAVuctsm4nX9jcfr/ns3gKomi8grwEK3398LGt6Bh/ij++8krKHdBEn6oRw+m7uFD2bGU1kPMbrNLLrtHo1sx5sbvfsTUKl6uMM0JuzEn1cuRGQ4kIo3AjDAw0BdVb0rdKGFRmxsrMbFxYU7DFOK5ecrIxds492p60k5cIgBzZZye+bnRGYmwSk3eG+k1y6y06Ix5YqILFLV2MK2+ftE8ijwEt5LhABT8ZKJMeXKqh1p/N/3q1i0NYW7miTwXK0RVN27Gpp1hltGQfPO4Q7RmFLH315bB4D+7qVCVdWM0IZlTMlKP5TDsDmbGfTLBtpX2suvLb+j2e5foGYzb3KpU663N9KNOQJ/e22dijfUSV23vhfoq6orQxibMSGnqnwVl8g/J60hOzOdj5pM45K08UhyNFz0Ipz1CERXCXeYxpRq/lZtfQQ8paozAETkArz3Ms4OUVzGhNzyxFTemLyWufF7eaLhMh6qPIKY5N3Q8RavHcReKDTGL/4mkmoFSQRAVWeKSLUQxWRMSKVl5vDBzHg+nrOJblUSmddoFMelLYXGneCWL21kXmOKyd9EsklEXgIKptu9HdgUmpCMCY1DOXkMn7uZYbM3oZnJjGoyka4pPyA5deHq/0Cn2yHC31erjDEF/E0k9wAvAwVvps92ZcaUeqrKnA17+b/vV5KwL53/a7yA26I/Jyolw5sf/YL+UKV2uMM0pszyN5G0VtXHQhqJMSGwcnsar01aw9z4fVxZaxM/Nh5J9ZQ10Po86PkvaHhSuEM0pszzN5G8JSLHAeOBsdZby5R2iSmZvDF5HT8s28GJVfYzrcW3nJA0BSo3h5s+96a6te68xgSFv++RXOgSyU3AR25OkrGq+mpIozOmmPLylVHzt/LapLVEaA6fnTif83d8giTnw/n9ofvjEFM13GEaU674PWe7qu4C3heRGcBzwP8BlkhMqbFxTwZPj1vG0oRU7mm+i/75Q4nZuhba9YSer0OdVuEO0Zhyyd8XEk8CbgauB/bhDZXydAjjMsZvGVm5fDgjnk9+3cxxURnMOOFHWid+B7WaQ5/R0L5XuEM0plzz94lkODAG6KGqO0IYjzHFMmv9Hv76zQp2ph3gH80XcXPap0TsOADnPAXnPQMx9rqTMaHmbxvJWaEOxJjiSEjOZOCEVUxfm8RldXYxpekIqictg1bnwhVvQYMTwx2iMRWG320kxpQGuXn5vP9LPB/N2kgNyWTC8ZM5ded4JKI+XPcxnHqD9cYypoRZIjFlxqKtyTw9bhlb9h1gYOu13JE2hMid+6Dz/XDhX+2lQmPC5KiJRES+UNU7RORxVX2vpIIyxte+jCzemLyWcXGJdKyeyqJWo6m3cw40OQNu/wqanB7uEI2p0Ip6IjlTRJoA94jI58Cf6gx8prs1JuiycvMYNnsTQ2ZtIjsnm0/azueiXZ8gyRHeW+md74OIyHCHaUyFV1QiGQJMB44HFvHnRKKu3Jig27Qng2fHL2fR1hTuPT6NZ7M/oHLCSu+dkCvehFrNwh2iMcY5aiJR1ffxXkIcrKoPllBMpgLLz1fem76BIbM2Uisqm6kdptN28xdQrYENbWJMKeVv998HRaQjcK4rmq2qy0MXlqmIEpIz+dt3K5m9fg/PttnGA+mDiNqUCGfe7U00ZY3pxpRKfk2+ICKPASOBhu4zUkQe9eO44SKSJCIrfcoGish2EVnqPr18tr0gIvEisk5EeviUX+7K4kWkv095axGZ78rHikiMf7dtSpP8fOXtqeu55O1ZbNq8iV9afcHD2/sTFVMN7p4MV71rScSYUkxUteidRJYDZ6nqAbdeDfhdVU8r4rjzgAzgc1U9xZUNBDJU9c3D9u0AjAa6AE2AaUA7t3k9cCmQCCwEblHV1SIyDvhGVceIyBBgmaoOPlpMsbGxGhcXV+Q9m5KxYXc6L3yzgrityQxosYK+aUOIyM2Ec5+Bc56AqErhDtEYA4jIIlWNLWybv++RCJDns57HYT24CqOqs0WklZ/X6A2MUdUsYLOIxOMlFYB4Vd0EICJjgN4isga4CLjV7TMCGAgcNZGY0iEvX/l07mb+NWUdLaNS+b3FaBonzYLmXb3ZCu3NdGPKDH8TyafAfBH51q1fA3wSwHUfEZE7gTjgaVVNAZoC83z2SXRlAAmHlXcF6gGpqppbyP5/IiL9gH4ALVq0CCBsEwxrdu6n/zcrWJaQwoCmi+mbPpSI5Fzo8Rp0fcC69BpTxvjb2P62iMwEznFFd6vqkmO85mDgFbzuw68AbxHiaXtVdSgwFLyqrVBeyxxZTl4+b/68jo/nbKZdpRTmN/+SRnt+g5bdvaeQem3CHaIx5hgUZz6SxcDiQC+oqrsLlkVkGPCjW90ONPfZtZkr4wjl+4DaIhLlnkp89zelzJa9B3ju6+Us3LyXt45fwrV7hiCpQPwxtpUAABkjSURBVK83IfZeiPCr34cxphQq8bG2RKSxqu50q9cCBT26JgCjRORtvMb2tsACvLaYtiLSGi9R9AFuVVV1k2zdgDfEfV/g+5K7E+OPQzl5fDgjnsGzNnJC1F7mN/2ChjsWQOvz4er3bbIpY8qBkCYSERkNXADUF5FEYABwgYh0wqva2gI8AKCqq1wvrNVALvCwqua58zwCTAEigeGquspd4nlgjIi8CiwhsHYbE2Qrt6fxwBeL2JF6gH+3XMD1+4Yh6VFw5btw5l32YqEx5YRf3X8BRKQl0FZVp4lIFSBKVdNDGl0IWPff0FNVPvl1M/+avI6Tqu3n0zqfUXf3b9DmYrjqPajdvOiTGGNKlYC7/4rI/Xi9nuoCbfDaI4YAFwcrSFM+7D+Uw/PjlzNp5U7+2mwF96V/6PXIuvId7w11ewoxptzxt2rrYbx3OuYDqOoGEWkYsqhMmbQ8MZVHRi0hI2UX05uPp82ead57IdcMth5ZxpRj/iaSLFXNFvevSRGJwmvjMIbcvHw+nLmRd6et5+oqy/l3rY+J3pfmjY919mP2Xogx5Zy/iWSWiPwVqCIilwIPAT+ELixTVsQnpdP/6xWs2bqDLxp+Q/f9P0GtU+Da7+G4U8IdnjGmBPibSPoD9wIr8HpZTVTVYSGLypR62bn5DJ65kUEzNnB29AYW1B1K1fQdcM6TcMELNkaWMRWIv4nkNrxxsP6bPETkSlX98SjHmHJq3qZ9vPLjatbuSOE/TabSM+VLpFIL6DMJWnQLd3jGmBLm7+vE/wHmiMhJPmV/D0E8phTLys3jtUlr6DN0HjHpCcQ1eYteyZ8jp/WBv/xqScSYCsrfJ5LNeFVb40VkoKp+hR+j/5ryY0fqQf7y5SKWJ6bxetu13Jz0DnJA4PpP4NQbwh2eMSaM/E0kqqqLReR8YLSIdMV7y9yUc6rK1NW76f/NCqJy0pnb7juabvve69Z73TCo0zLcIRpjwszfRLITQFX3upkL3wCsS045l5Wbx9++Xcn4RYn0qrudd6sNIiYhAc7vD+c9C5ElPlSbMaYU8ncY+St8lvOBZ93HlFO70g7x4MhFLN2WzBftfuOcxKFIjcZw10RoeVa4wzPGlCJHTSQi8q6qPiEiP1DIC4iqenXIIjNhoap8FZfIPyetoUpOCgtajKDBtrlw8rXeYIs2d7ox5jBFPZF84X6+edS9TLmw/1AOr/ywmq8WJXJH4+0MyH6TqH2pNlqvMeaojppIVHWR+zkLQESi8dpGtqtqUujDMyVlw+507v88jq37MvjshLmcv30oUqcl3D4eGp8W7vCMMaXYUd8jEZEhInKyW64FLAM+B5aIyC0lEJ8pAbPX7+GGIb8TeSiZJW2GcUHiYKTD1dBvliURY0yRinoh8VyfSaTuBtar6qnAmcBzIY3MhFxevvL6pLXcOXwBF1TZyJQqf6P2rt/hirfghk+hcs1wh2iMKQOKaiPJ9lm+FPgKQFV3idWXl2lpB3N4YswSZqxL4oPWv9Fr1xCkdnO4dSo06RTu8IwxZUhRiSRVRK7Emyu9O97b7QXDyFcJcWwmRBJTMuk7fAF7k5OZ2Wo0rXb+DCddBb0/gMq1wh2eMaaMKSqRPAC8DxwHPKGqu1z5xcBPoQzMhMbMdUk8PW4Zx+Vt57f6g6i2eyNc8jJ0f9x6ZRljjklRvbbWA5cXUj4FmBKqoEzwqSqf/baFV35czR111/B/Oe8SmR0Nt38DbS4Md3jGmDLM39F/j4mIDBeRJBFZ6VNWV0SmisgG97OOKxcReV9E4kVkuYic4XNMX7f/BhHp61N+poiscMe8L9ZwU6icvHyeHreMv/+wkncaTuLlA68QWe946DfTkogxJmAhTSTAZ/zvE01/YLqqtgWmu3WAnkBb9+kHDAYv8QADgK5488YPKEg+bp/7fY77n6enii7tYA73jYhj2pJ1TG88mN5pX0DHW+GeKTbgojEmKEKaSFR1NpB8WHFvYIRbHgFc41P+uXrmAbVFpDHQA5iqqsmqmgJMBS5322qq6jxVVbz3W67B/FdCciY3DP6N7fHL+bXuPzg+bT70ehOu+RCira+EMSY4ip1IRCTQWREbqepOt7wLaOSWmwIJPvslurKjlScWUm6ARVuT6fX+HI7fP5/J1QZSkwPQ90focr81qhtjgupYnkiC9sfaPUn8z2CQwSYi/UQkTkTi9uzZE+rLhd3I+VvpM/R37on+mSHyOlF1WsD9v9iovcaYkDiWRLIkwGvudtVSuJ8FY3ZtB5r77NfMlR2tvFkh5f9DVYeqaqyqxjZo0CDA8EsvVeXNKesY8O1SPqozkidzPkba9YB7rT3EGBM6xU4kqnpPgNecABT0vOoLfO9TfqfrvdUNSHNVYFOAy0SkjmtkvwyY4rbtF5FurrfWnT7nqnBUldcnr+XLGUuYXO8dLsr4Cc55Em4eCZVqhDs8Y0w5FtIp7kRkNHABUF9EEvF6X70OjBORe4GtwE1u94lALyAeyMQb2wtVTRaRV4CFbr+/q2pBA/5DeD3DqgCT3KfCUVXembqen2fPZVrNd6h3KAmu/Qg69gl3aMaYCkC8ZoqKIzY2VuPi4sIdRtDk53tPInFzJvNF1XeoWika6TMKWnQNd2jGmHJERBapamxh22zS7TIsL1958buVpMSNZ2zlD4mq1Ry5fTzUPT7coRljKpCiptq908/zLFXV5UGIx/gpNy+f/t+soObSYfwz5kto2hm5ZQxUqxfu0IwxFUxRTySt/TzPlgDjMMVwICuXJ0Yvolv8O9wbPQnaXwnXf2wvGRpjwqKoRPJPVc0pkUiMX5IPZHPXsDk8sO91rohaAF0fhB7/gIjIcIdmjKmgikok20VkAjAa+EUrWst8KZOYkskDH8/kxfRXOStyFfT4J5z1cLjDMsZUcEW9R3ISXrfbF4EEEXnPveNhStjyxFT6DprC6xkv0i1yLVw71JKIMaZUOGoiUdV9qvqRql6IN/LuJuAdEdkoIv8okQgNi7el8OTHk/g4/yVOjkpE+oyEjjeHOyxjjAGK8Wa7qu4APsEbuj0duC9UQZk/rEhM4+XPfuBLeYmW0SlE3P41nNgz3GEZY8x/FZlIRKSyiNwoIt/gvXV+Ed4cIk1CHVxFF5+UwcBhYxme/xINK+URcdeP0PrccIdljDF/UtR7JKOAS4BZwEjgVlU9VBKBVXRb9h7g1Y/H8om8TPVqNYi8awI0aBfusIwx5n8U1WtrMvCAqqaXRDDGs3XfAV4aMpJBuQOpUq0WUff+ZG+rG2NKraISyUygYNTdo0lV1f3BCali25F6kFeHjuKD3IFUqV6b6Ht+grr+vhdqjDElr6hEMqKI7eBNTPUZ3lS3JgD7MrJ4efDnvHVoIJVq1CX63ok2j4gxptQ7aiJx3X5NCTiQlcvfP/qSNw8NIKZGfSrdNxFqtwh3WMYYUyQb/bcUyMjK5dXh43l5/0tEVa9HpfsnQ61mRR9ojDGlwLFMtWuCKC9fee3LH3l613NUqlyNKvf+aEnEGFOm2BNJmA36dgYPbnuaGjERVL73B2tYN8aUOZZIwmji70u5atmD1I86SOW7J0LD9uEOyRhjis0SSZgsWLeVlpP60iQyhag7v4MmncIdkjHGHBNrIwmDTbtSyBl9B+0jtpF3/QiiWp0d7pCMMeaYWSIpYbm5eWz89D66s4y0i/9NtVNsAEZjTNkWtkQiIltEZIWILBWROFdWV0SmisgG97OOKxcReV9E4kVkuYic4XOevm7/DSLSN1z3469ZHz/DpVnT2HDSQ9Q91wZQNsaUfeF+IrlQVTupaqxb7w9MV9W2wHS3DtATaOs+/fCGskdE6gIDgK5486UM8GM4l7CZNe49Lt41nGX1rqDtTf8MdzjGGBMU4U4kh+vNH8OyjACu8Sn/XD3zgNoi0hjoAUxV1WRVTQGmApeXdND+WDJ3Ct1W/Z01lU/nlL98BiLhDskYY4IinIlEgZ9FZJGI9HNljVR1p1veBTRyy02BBJ9jE13Zkcr/RET6iUiciMTt2bMnmPfgl12Jm2j2cz/2RdSn1V++IjI6psRjMMaYUAln999zVHW7iDQEporIWt+NqqoiosG4kKoOBYYCxMbGBuWc/srPyiTjs5tozCHSbvmWKrUblOTljTEm5ML2RKKq293PJOBbvDaO3a7KCvczye2+HWjuc3gzV3ak8tJBlc2f3ccJuRtY2vlfNGl3RtHHGGNMGROWRCIi1USkRsEycBmwEpgAFPS86gt875YnAHe63lvdgDRXBTYFuExECuZMucyVlQp7Zw2lzc6f+LrmHZx9xZ3hDscYY0IiXFVbjYBvxWtwjgJGqepkEVkIjBORe4GtwE1u/4lAL7w54zOBuwFUNVlEXgEWuv3+rqrJJXcbR5a/Yzk1Z/6NuXoaZ939BmKN68aYciosiURVNwEdCynfB1xcSLkCDx/hXMOB4cGOMSBZ6aR9cRvZWo2dF79H9zrVwh2RMcaETGnr/lsu7P/6MWpmJjC6+QCuP/f0cIdjjDEhZYkkyHTlN9Rc/w1DI27kjj63WZWWMabcs9F/gyl9N9nfP8ma/OOp2+sF6lWvFO6IjDEm5OyJJFhUOfTto2jOAYbVe54bu9gEVcaYisESSbAsH0vlTVN4J78PT996JRERVqVljKkYrGorGDKTyZ74Aivy21Lnosc5vkH1cEdkjDElxp5IgiB36stEZqUypPrD3Htum3CHY4wxJcoSSaAS44hcMoLPcntw13VXER1p/0mNMRWL/dULhCrZPz5LktZmdbuH6X5C/XBHZIwxJc4SSSDW/EDMrsW8m3cjT10dW/T+xhhTDllj+7HKyyX754Fs1aZU6Xw7TWtXCXdExhgTFvZEcqyWjSYmdSP/4RYevKh9uKMxxpiwsSeSY5GfT9ast9mQ34pW3W+kQQ17g90YU3HZE8mxWPcTldI2MZze3HPu8eGOxhhjwsqeSI5B9ux32aUNqdbpWmpXtfnXjTEVmz2RFNfu1cTsjGNE3mXcf/6J4Y7GGGPCzp5Iiilv0efkE8We1tfSol7VcIdjjDFhZ4mkOPJyyV06hml5Z3DdOaeFOxpjjCkVrGqrOBLmUyk7hd+rnM95bRuEOxpjjCkV7ImkGDJX/kiURlL31MttmHhjjHEskRRD5rpfWJ9/Ild1sUZ2Y4wpUC6qtkTkchFZJyLxItI/JBfJOUjt9A1sq3oybRvVCMkljDGmLCrziUREIoEPgJ5AB+AWEekQ7Osc3LmGKPKIaX56sE9tjDFlWplPJEAXIF5VN6lqNjAG6B3si2zZtAGAFq3bBfvUxhhTppWHRNIUSPBZT3Rl/yUi/UQkTkTi9uzZc0wX0WoNmFu9Bye0DfrDjjHGlGkVorFdVYcCQwFiY2P1WM7RofNF0PmioMZljDHlQXl4ItkONPdZb+bKjDHGlIDykEgWAm1FpLWIxAB9gAlhjskYYyqMMl+1paq5IvIIMAWIBIar6qowh2WMMRVGmU8kAKo6EZgY7jiMMaYiKg9VW8YYY8LIEokxxpiAWCIxxhgTEEskxhhjAiKqx/R+XpklInuArcd4eH1gbxDDCafyci92H6WL3UfpE6x7aamqhU7EVOESSSBEJE5VY8MdRzCUl3ux+yhd7D5Kn5K4F6vaMsYYExBLJMYYYwJiiaR4hoY7gCAqL/di91G62H2UPiG/F2sjMcYYExB7IjHGGBMQSyTGGGMCYonETyJyuYisE5F4Eekf7ngKiMgWEVkhIktFJM6V1RWRqSKywf2s48pFRN5397BcRM7wOU9ft/8GEenrU36mO3+8O1aCFPdwEUkSkZU+ZSGP+0jXCPJ9DBSR7e47WSoivXy2veBiWiciPXzKC/39ctMjzHflY91UCYhIJbce77a3CvA+movIDBFZLSKrRORxV16mvpOj3EdZ/E4qi8gCEVnm7uXlY71+sO7xiFTVPkV88Ian3wgcD8QAy4AO4Y7LxbYFqH9Y2b+A/m65P/CGW+4FTAIE6AbMd+V1gU3uZx23XMdtW+D2FXdszyDFfR5wBrCyJOM+0jWCfB8DgWcK2beD+92pBLR2v1ORR/v9AsYBfdzyEOBBt/wQMMQt9wHGBngfjYEz3HINYL2Lt0x9J0e5j7L4nQhQ3S1HA/Pdf79iXT+Y93jEWIPxR6G8f4CzgCk+6y8AL4Q7LhfLFv43kawDGrvlxsA6t/wRcMvh+wG3AB/5lH/kyhoDa33K/7RfEGJvxZ//AIc87iNdI8j3MZDC/2j96fcGbw6ds470++X+kOwFog7/PSw41i1Huf0kiN/N98ClZfU7KeQ+yvR3AlQFFgNdi3v9YN7jkT5WteWfpkCCz3qiKysNFPhZRBaJSD9X1khVd7rlXUAjt3yk+zhaeWIh5aFSEnEf6RrB9oir8hnuU1VT3PuoB6Sqau5h5X86l9ue5vYPmKsSOR3vX8Bl9js57D6gDH4nIhIpIkuBJGAq3hNEca8fzHsslCWSsu8cVT0D6Ak8LCLn+W5U758UZa6Pd0nEHcJrDAbaAJ2AncBbIbhGSIhIdeBr4AlV3e+7rSx9J4XcR5n8TlQ1T1U7Ac2ALkD7MIdUKEsk/tkONPdZb+bKwk5Vt7ufScC3eL9su0WkMYD7meR2P9J9HK28WSHloVIScR/pGkGjqrvdH4B8YBjed3Is97EPqC0iUYeV/+lcbnstt/8xE5FovD++I1X1G1dc5r6Twu6jrH4nBVQ1FZiBV81U3OsH8x4LZYnEPwuBtq4nQwxeQ9aEMMeEiFQTkRoFy8BlwEq82Ap6y/TFqyfGld/petx0A9JclcIU4DIRqeMe+S/DqxPdCewXkW6uh82dPucKhZKI+0jXCJqCP4rOtXjfScG1+7jeNa2BtngN0IX+frl/nc8AbigkXt/7uAH4xe1/rDEL8AmwRlXf9tlUpr6TI91HGf1OGohIbbdcBa+tZ80xXD+Y91i4YDVqlfcPXi+V9Xh1lH8LdzwupuPxelosA1YVxIVXxzkd2ABMA+q6cgE+cPewAoj1Odc9QLz73O1THov3P91GYBDBazwcjVfFkINXB3tvScR9pGsE+T6+cHEud/8TN/bZ/28upnX49IA70u+X+44XuPv7Cqjkyiu79Xi3/fgA7+McvCql5cBS9+lV1r6To9xHWfxOTgOWuJhXAv93rNcP1j0e6WNDpBhjjAmIVW0ZY4wJiCUSY4wxAbFEYowxJiCWSIwxxgTEEokxxpiAWCIxZZaIqIi85bP+jIgMLOY5PhNvVNhKbr2+iGwJUnwXiMiPwThXEddp4EZqXSIi5x627QkRqerHOTJCF6Ep7yyRmLIsC7hOROoHeJ48vHcfShURifRz14uBFap6uqrOOWzbE3gD/hkTMpZITFmWizcf9ZMBnudd4EmfISGA/32iEJFBInKXW94iIq+JmwdGRM4QkSkislFE/uJzmpoi8pN4cz4MEZEId/xlIvK7iCwWka/c2FAF531DRBYDNx4WTysR+UW8gQeni0gLEemENwx7bxdLFZ/9HwOaADNEZIYru0W8OUFWisgbh/+HcE9kv4vIFW79WRFZ6K75sk8ca0RkmHjzZPxccF0ReUy8uUCWi8iYY/s6TFljicSUdR8At4lIrQDOsQ34FbijuMepN6DeHOAzvCElugEv++zTBXgUb06INvzxBPUicIl6A27GAU/5HLNPVc9Q1cP/EP8HGKGqpwEjgfdVdSnwf3hzT3RS1YMFO6vq+8AO4EJVvVBEmgBvABfhDV7YWUSuKdhfRBoBP+G9Qf2TiFyGN5xGF7f/mfLHoKBtgQ9U9WQgFbjelfcHTncx+iZUU45FFb2LMaWXqu4Xkc+Bx4CDRe1/FK/hjSf0UzGOKRhvbQXeBETpQLqIZBWMkQQsUNVNACIyGm8Ij0N4iWWuNzQUMcDvPucde4TrnQVc55a/wHsSKY7OwExV3ePiGYk3Mdd3eBMnTQceVtVZbv/L3GeJW6+Ol0C2AZtdEgNYhDcnC3jDeYwUke/ceU0FYInElAfv4k3682lhG0VkCt4cF3Gqel9h+6jqBvHmfbjJpziXPz+1Vz7ssCz3M99nuWC94P+tw8cgUrxxqqaq6i2F3g0cOEJ5KOXiJYQeQEEiEeA1Vf3Id0fx5vnwvd88oKBK7Qq85HQV8DcROVX/mNfClFNWtWXKPFVNxpsa9N4jbO/hqn0KTSI+/gE847O+FejgRk2tjdeoXVxd3OiqEcDNeFVo84DuInIC/HcU53Z+nOs3vBFaAW7Dq1IrSjrelLPgDcJ3vmsHicSbpbAgaSheh4P2IvK8K5sC3OPTftNURBoe6ULuHpur6gzgebxhzKv7EaMp4+yJxJQXbwGPBHICVV3lGrnPcOsJIjIOb+TVzfxRxVMcC/FGuj0Bb2jub1U13zXajy7odozXZrK+iHM9CnwqIs8Ce4C7/bj+UGCyiOxw7ST9XRwC/KSq/x0eXFXzROQWYIKIpKvqhyJyEvC7q4LLAG7HewIpTCTwpWuvErw2nFQ/YjRlnI3+a4wxJiBWtWWMMSYglkiMMcYExBKJMcaYgFgiMcYYExBLJMYYYwJiicQYY0xALJEYY4wJyP8D0Ts/I8W7IIQAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.plot(N, V)\n",
"\n",
"k = 10.34\n",
"beta = 0.64\n",
"plt.plot(N, k * (N**beta))\n",
"plt.xlabel('N - Number of tokens')\n",
"plt.ylabel('|V| - Size of vocabulary')\n",
"plt.title('Heaps\\' Law')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
h>0</th>\n",
" <td>@BubblyDentist @MeetUunngLee nahi nahi, mere s...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>going to the grammys first entertainment law b...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>bohot hi badiya ji aap sunao?</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Parvez Musharraf is Digvijay Singh of Pakistan...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>guddu ko bass john cena k sticker ki padii hai...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Aman ki maa ki... Asha https://twitter.com/ash...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>pakistan can wait more more and more . . . ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>@sagarcasm Jai Mahesh !!</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>RT AAP_Najafgarh \" RT AAPInNews : When DDCA lo...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>\"Kaam ho jayega, thoda kharcha paani lagega\" \\...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sentence\n",
"0 @BubblyDentist @MeetUunngLee nahi nahi, mere s...\n",
"1 going to the grammys first entertainment law b...\n",
"2 bohot hi badiya ji aap sunao?\n",
"3 Parvez Musharraf is Digvijay Singh of Pakistan...\n",
"4 guddu ko bass john cena k sticker ki padii hai...\n",
"5 Aman ki maa ki... Asha https://twitter.com/ash...\n",
"6 pakistan can wait more more and more . . . ...\n",
"7 @sagarcasm Jai Mahesh !!\n",
"8 RT AAP_Najafgarh \" RT AAPInNews : When DDCA lo...\n",
"9 \"Kaam ho jayega, thoda kharcha paani lagega\" \\..."
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[:10]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# s = re.sub('[^0-9a-zA-Z]+', '*', s)\n",
"# https://stackoverflow.com/questions/12985456/replace-all-non-alphanumeric-characters-in-a-string"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def cleanText(raw_text):\n",
" '''\n",
" Convert a raw review to a cleaned review\n",
" '''\n",
" text = BeautifulSoup(raw_text, 'html').get_text() #remove html\n",
" words = text.split()\n",
" words = [w for w in words if '@' not in w and '#' not in w] # remove the @-words and #-words\n",
" text = ' '.join(words)\n",
" letters_only = re.sub('[^a-zA-Z]+', ' ', text) # remove non-character\n",
" \n",
" return( letters_only.lower())\n",
"\n",
"vclean = np.vectorize(cleanText)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sentence</th>\n",
" <th>Cleaned sentence</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>@BubblyDentist @MeetUunngLee nahi nahi, mere s...</td>\n",
" <td>nahi nahi mere saath jaakar pachtaogi ye uunng...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>going to the grammys first entertainment law b...</td>\n",
" <td>going to the grammys first entertainment law b...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>bohot hi badiya ji aap sunao?</td>\n",
" <td>bohot hi badiya ji aap sunao</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Parvez Musharraf is Digvijay Singh of Pakistan...</td>\n",
" <td>parvez musharraf is digvijay singh of pakistan...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>guddu ko bass john cena k sticker ki padii hai...</td>\n",
" <td>guddu ko bass john cena k sticker ki padii hai...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Aman ki maa ki... Asha https://twitter.com/ash...</td>\n",
" <td>aman ki maa ki asha https twitter com ashabhos...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>pakistan can wait more more and more . . . ...</td>\n",
" <td>pakistan can wait more more and more aakhir pa...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>@sagarcasm Jai Mahesh !!</td>\n",
" <td>jai mahesh</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>RT AAP_Najafgarh \" RT AAPInNews : When DDCA lo...</td>\n",
" <td>rt aap najafgarh rt aapinnews when ddca lowere...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>\"Kaam ho jayega, thoda kharcha paani lagega\" \\...</td>\n",
" <td>kaam ho jayega thoda kharcha paani lagega sir...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sentence \\\n",
"0 @BubblyDentist @MeetUunngLee nahi nahi, mere s... \n",
"1 going to the grammys first entertainment law b... \n",
"2 bohot hi badiya ji aap sunao? \n",
"3 Parvez Musharraf is Digvijay Singh of Pakistan... \n",
"4 guddu ko bass john cena k sticker ki padii hai... \n",
"5 Aman ki maa ki... Asha https://twitter.com/ash... \n",
"6 pakistan can wait more more and more . . . ... \n",
"7 @sagarcasm Jai Mahesh !! \n",
"8 RT AAP_Najafgarh \" RT AAPInNews : When DDCA lo... \n",
"9 \"Kaam ho jayega, thoda kharcha paani lagega\" \\... \n",
"\n",
" Cleaned sentence \n",
"0 nahi nahi mere saath jaakar pachtaogi ye uunng... \n",
"1 going to the grammys first entertainment law b... \n",
"2 bohot hi badiya ji aap sunao \n",
"3 parvez musharraf is digvijay singh of pakistan... \n",
"4 guddu ko bass john cena k sticker ki padii hai... \n",
"5 aman ki maa ki asha https twitter com ashabhos... \n",
"6 pakistan can wait more more and more aakhir pa... \n",
"7 jai mahesh \n",
"8 rt aap najafgarh rt aapinnews when ddca lowere... \n",
"9 kaam ho jayega thoda kharcha paani lagega sir... "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['Cleaned sentence'] = vclean(data['Sentence'])\n",
"data[:10]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 nahi nahi mere saath jaakar pachtaogi ye uunng...\n",
"1 going to the grammys first entertainment law b...\n",
"2 bohot hi badiya ji aap sunao \n",
"3 parvez musharraf is digvijay singh of pakistan...\n",
"4 guddu ko bass john cena k sticker ki padii hai...\n",
"5 aman ki maa ki asha https twitter com ashabhos...\n",
"6 pakistan can wait more more and more aakhir pa...\n",
"7 jai mahesh \n",
"8 rt aap najafgarh rt aapinnews when ddca lowere...\n",
"9 kaam ho jayega thoda kharcha paani lagega sir...\n",
"Name: Cleaned sentence, dtype: object"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[:10]['Cleaned sentence']"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"def tokenize(s):\n",
" return tuple(s.split())"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of tokens: 303161\n",
"Number of word types: 32707\n"
]
}
],
"source": [
"tokens = []\n",
"for i in range(len(data)):\n",
" tokens.extend( tokenize( data.iloc[i]['Cleaned sentence']) )\n",
"sorted_tokens = sorted(tokens)\n",
"word_types = list(set(tokens))\n",
"print('Number of tokens:', len(tokens))\n",
"print('Number of word types:', len(word_types))"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TTR: 0.10788656852299604\n"
]
}
],
"source": [
"print('TTR:', len(word_types)/len(tokens))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 2. Zipf's Law"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"token_count = {}\n",
"for s in sorted_tokens:\n",
" if s in token_count:\n",
" token_count[s] += 1\n",
" else:\n",
" token_count[s] = 1"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Top 50 Word types in decreasing order of number of occurences:\n",
"\n"
]
},
{
"data": {
"text/plain": [
"[('hai', 10030),\n",
" ('to', 4154),\n",
" ('ki', 3224),\n",
" ('ke', 3170),\n",
" ('nahi', 3169),\n",
" ('bhi', 2929),\n",
" ('the', 2866),\n",
" ('se', 2601),\n",
" ('ho', 2365),\n",
" ('ka', 2310),\n",
" ('bhai', 2266),\n",
" ('ko', 2208),\n",
" ('me', 1955),\n",
" ('ye', 1869),\n",
" ('kya', 1815),\n",
" ('hi', 1801),\n",
" ('aur', 1797),\n",
" ('twitter', 1760),\n",
" ('com', 1724),\n",
" ('kar', 1681),\n",
" ('i', 1509),\n",
" ('in', 1387),\n",
" ('t', 1319),\n",
" ('https', 1310),\n",
" ('is', 1296),\n",
" ('mein', 1276),\n",
" ('a', 1202),\n",
" ('ek', 1165),\n",
" ('and', 1126),\n",
" ('status', 1108),\n",
" ('of', 1074),\n",
" ('on', 1071),\n",
" ('na', 1026),\n",
" ('s', 1009),\n",
" ('ab', 969),\n",
" ('toh', 963),\n",
" ('rt', 944),\n",
" ('tha', 937),\n",
" ('http', 905),\n",
" ('for', 885),\n",
" ('you', 885),\n",
" ('aaj', 873),\n",
" ('co', 872),\n",
" ('raha', 868),\n",
" ('par', 826),\n",
" ('ne', 824),\n",
" ('aap', 820),\n",
" ('hain', 816),\n",
" ('koi', 802),\n",
" ('kuch', 801)]"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sorted_token_count = list(token_count.items())\n",
"sorted_token_count.sort(key=lambda x:x[1], reverse=True)\n",
"print('Top 50 Word types in decreasing order of number of occurences:\\n')\n",
"sorted_token_count[:50]"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 1.0, \"Zipf's Law\")"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAEWCAYAAABMoxE0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3deZhcdZ3v8fcnTQeazWbpyUAHSMTcMCBCsGURFxYliCJ5GFBcxoA43OfKdZ8oudd5wqKCExVQR53IrgyrERHQGAGRywyBxLAFCGRQSJpAIiQgmsEs3/vH+RWp7nR3ne7T1dWn+/N6nnqqzq/OqfM9RZFv/85vU0RgZmY2UGMaHYCZmZWbE4mZmRXiRGJmZoU4kZiZWSFOJGZmVogTiZmZFeJEYjYAkn4haXrOfcdJ+q2kP0n6Zr1jMxtqWzU6ALPhRtJHgH/r4a3tgFkRcW5EvKcfH3kG8Edgx4gISWcDRMTZ/YwrgEkRsaw/x5nVm2skZt1ExNURsX31A/gs8DzwwwF85F7Ao+HRvzZCOZGY1SBpCnARcEpErExlv5H0ifT6VEn3SPqupJckPS7p6PTeFcB04IuSXpH0rm6fvaukWyStlfSipLsl9ev/S0l7S7pD0guS/ijpakmt6b3TJP28at8nJd1Qtb1c0oED+mLMEt/aMutD+gf5RuC8iPhNH7sekvbbFTgRmCtpYkScKglgRUR8Oe3766rjvgCsANrS9qFAf2suAs4HfgvsCPwEOJusFnUXcGFKTn8LjAUOS9f2emB74KF+ns+sC9dIzHqhLANcBTwC/EuN3VcBF0XE+oi4DlgKvDfHadYDuwF7pWPv7u8tsIhYFhHzI+LViFgNfAt4Z3rvKeBPwIHAO4B5wLOS9kn73B0Rm/pzPrPuXCMx692XgP2AN+f4x72z2z5PA7vnOMdsstrDr1LNZU5EXNCfICWNAy4G3g7sQPYH4pqqXe4CjgDekF6vJUsih6Vts0JcIzHrgaQjgP8LnBQRa3Mc0p5qMBV7As/WOigi/hQRX4iI1wPvBz5faV/ph6+R3Q7bPyJ2BD5KdruropJI3p5e30WWSN6JE4kNAicSs24k7QZcC3w2IhbnPOxvgE9LapZ0MvB3wG05zvU+SW9ISeglYCPQ162msZK2qXo0kdVCXgFektQOzOh2zF3AkUBLRKwA7gaOBXYB8l6fWa+cSMy29I/AOODi1NOq+vGDXo5ZAEwiGy/yVbKazAs5zjWJrPH9FeA/ge9FxJ197L8EWFf1OA04BziILBHdCsytPiAinkiff3fafhl4CrgnIjbmiNGsT3LXdrNiJJ0KfCIi3tboWMwawTUSMzMrxInEzMwK8a0tMzMrxDUSMzMrZNQNSNx1111jwoQJjQ7DzKxUFi1a9MeIaOvpvVGXSCZMmMDChQsbHYaZWalIerq393xry8zMCnEiMTOzQpxIzMysECcSMzMrxInEzMwKGXW9turppsWdzJ63lGfXrmP31hZmTJ3MtCntjQ7LzKyu6lojkXSZpFWSHqkq21nS/LR29HxJO6VySfq2pGWSHpJ0UNUx09P+T0qaXlX+ZkkPp2O+3W09iCF10+JOZs59mM616wigc+06Zs59mJsWdzYqJDOzIVHvW1tXkK17UO0s4PaImATcnrYB3kM2pfYk4Azg+5AlHmAW2ZrYBwOzKskn7fOPVcd1P9eQmT1vKevWd52Re936jcyet7RBEZmZDY26JpKI+C3wYrfiE4Ar0+srgWlV5VdF5l6gNS0wNBWYHxEvRsQaYD5wbHpvx4i4Ny1xelXVZw25Z9eu61e5mdlI0YjG9nERsTK9fo5sASGAdmB51X4rUllf5St6KN+CpDMkLZS0cPXq1cWvoAe7t7b0q9zMbKRoaK+tVJOo+/TDETEnIjoioqOtrcepYgqbMXUyLc1NXcpampuYMXVyXc5nZjZcNCKRPJ9uS1XWxl6VyjuBPar2G5/K+iof30N5Q0yb0s75J+5Pe2sLAtpbWzj/xP3da8vMRrxGdP+9GZgOXJCef1ZV/r8lXUvWsP5SRKyUNA/4WlUD+zHAzIh4UdLLkg4lWy/7Y8B3hvJCups2pd2Jw8xGnbomEknXAEcAu0paQdb76gLgekmnA08DH0i73wYcBywD/gKcBpASxnnA/Wm/cyOi0oD/SbKeYS3AL9LDzMyG0KhbIbGjoyM8jbyZWf9IWhQRHT295ylSzMysECcSMzMrxInEzMwKcSIxM7NCnEjMzKwQJxIzMyvEicTMzApxIjEzs0KcSMzMrBAnEjMzK8SJxMzMCnEiMTOzQpxIzMysECcSMzMrxInEzMwKcSIxM7NCnEjMzKwQJxIzMyvEicTMzApxIjEzs0KcSMzMrBAnEjMzK8SJxMzMCnEiMTOzQpxIzMysECcSMzMrxInEzMwKcSIxM7NCnEjMzKwQJxIzMyukYYlE0uckLZH0iKRrJG0jaaKkBZKWSbpO0ti079Zpe1l6f0LV58xM5UslTW3U9ZiZjVYNSSSS2oFPAx0R8UagCTgF+DpwYUS8AVgDnJ4OOR1Yk8ovTPshad903H7AscD3JDUN5bWYmY12jby1tRXQImkrYFtgJXAUcGN6/0pgWnp9QtomvX+0JKXyayPi1Yj4PbAMOHiI4jczMxqUSCKiE/gG8AxZAnkJWASsjYgNabcVQHt63Q4sT8duSPvvUl3ewzGvkXSGpIWSFq5evXrwL8jMbBRr1K2tnchqExOB3YHtyG5N1UVEzImIjojoaGtrq9dpzMxGpUbd2noX8PuIWB0R64G5wOFAa7rVBTAe6EyvO4E9ANL7rwNeqC7v4RgzMxsCjUokzwCHSto2tXUcDTwK3AmclPaZDvwsvb45bZPevyMiIpWfknp1TQQmAfcN0TWYmRlZg/eQi4gFkm4EfgdsABYDc4BbgWslfSWVXZoOuRT4kaRlwItkPbWIiCWSridLQhuAMyNi45BejJnZKKfsD/vRo6OjIxYuXNjoMMzMSkXSoojo6Ok9j2w3M7NCnEjMzKwQJxIzMyvEicTMzApxIjEzs0KcSMzMrBAnEjMzK8SJxMzMCnEiMTOzQpxIzMysECcSMzMrxInEzMwKcSIxM7NCnEjMzKyQXOuRSDocOBvYKx0jICLi9fULzczMyiDvwlaXAp8DFgFeOMrMzF6TN5G8FBG/qGskZmZWSnkTyZ2SZgNzgVcrhRHxu7pEZWZmpZE3kRySnquXWQzgqMENx8zMyiZXIomII+sdiJmZlVOu7r+Sxkm6VNIv0va+kk6vb2hmZlYGeceRXAHMA3ZP208An61HQGZmVi55E8muEXE9sAkgIjbgbsBmZkb+RPJnSbuQNbAj6VDgpbpFZWZmpZG319bngZuBvSXdA7QBJ9UtKjMzK428iWQx8E5gMtn0KEvxPF1mZkb+ZHBpRGyIiCUR8QgwFritjnGZmVlJ5E0knZK+ByBpJ2A+8OO6RWVmZqWRK5FExD8Dr0j6AfAr4JsRcXldIzMzs1Los41E0olVmwuAfwbuA0LSiRExt57BmZnZ8Fersf34btuLgeZUHmSTOJqZ2SjWZyKJiNPqdWJJrcAlwBvJktLHyXqDXQdMAP4AfCAi1kgScDFwHPAX4NTKzMOSpgNfTh/7lYi4sl4xm5nZlvLOtTVe0k8lrUqPn0gaX/DcFwO/jIh9gAOAx4CzgNsjYhJwe9oGeA8wKT3OAL6f4toZmEU2O/HBwKzUGcDMzIZI3l5bl5MNSNw9PX6eygZE0uuAd5CtvEhE/DUi1gInAJUaxZXAtPT6BOCqyNwLtEraDZgKzI+IFyNiDVlvsmMHGpeZmfVf3kTSFhGXp7EkGyLiCrLR7QM1EVgNXC5psaRLJG0HjIuIlWmf54Bx6XU7sLzq+BWprLfyLiSdIWmhpIWrV68uELaZmXWXN5G8IOmjkprS46PACwXOuxVwEPD9iJgC/JnNt7EAiIggze1VVETMiYiOiOhoayuS/8zMrLu8ieTjwAfIagkryebZOrXAeVcAKyJiQdq+kSyxPJ9uWZGeV6X3O4E9qo4fn8p6KzczsyGSN5GMj4j3R0RbRPxNREwD9hzoSSPiOWC5pMmp6GjgUbJ2mOmpbDrws/T6ZuBjyhwKvJRugc0DjpG0U2pkPyaVmZnZEMk7aeN3yGoMtcr641PA1ZLGAk8Bp5EltuvT6otPk9WCIJvX6zhgGVn339MAIuJFSecB96f9zo2IFwvEZGZm/VRrZPthwFuBNkmfr3prR6CpyIkj4gGgo4e3ju5h3wDO7OVzLgMuKxKLmZkNXK0ayVhg+7TfDlXlL+P1SMzMjNoj2+8C7pJ0RUQ8PUQxmZlZieSd/ddJxMzMeuRVDs3MrJA+E4mkr6fnk4cmHDMzK5taNZLj0sy7M4ciGDMzK59avbZ+CawBtpf0MiCyaUtE1it3xzrHZ2Zmw1yfNZKImBERrcCtEbFjROxQ/TxEMZqZ2TCWa2R7RJwgaRzwllS0ICI8ja6ZmeVe2OpksrXaTyabtuQ+SR6QaGZmuefa+jLwlohYBSCpDfg12ay9ZmY2iuUdRzKmkkSSF/pxrJmZjWB5ayS/lDQPuCZtf5BsRl4zMxvl8ja2z5B0IvC2VDQnIn5av7DMzKws8tZIiIi5wNw6xmJmZiXkdg4zMyvEicTMzArJO47kzT2UvW/wwzEzs7LJWyP5oaQ3VjYkfQj45/qEZGZmZZK3sf0k4EZJHwbeDnwMOKZuUZmZWWnk7f77lKRTgJuAZ4BjImJdXSMzM7NS6DORSHqYbNr4ip2BJmCBJCLiTfUMzszMhr9aNRI3qJuZWZ/6TCQR8XTltaQmYFytY8zMbHTJlRQkfQqYBTwPbErFAfjWlpnZKJe3dvEZYHJEvFDPYMzMrHzyjiNZDrxUz0DMzKyc8tZIngJ+I+lW4NVKYUR8qy5RlcBNizuZPW8pz65dx+6tLcyYOplpU9obHZaZ2ZDLm0ieSY+x6TGq3bS4k5lzH2bd+o0AdK5dx8y5DwM4mZjZqJN3QOI59Q6kTGbPW/paEqlYt34js+ctdSIxs1En76SNbZJmS7pN0h2VR9GTS2qStFjSLWl7oqQFkpZJuk7S2FS+ddpelt6fUPUZM1P5UklTi8aUx7Nrex7U31u5mdlIlrex/WrgcWAicA7wB+D+QTj/Z4DHqra/DlwYEW8A1gCnp/LTgTWp/MK0H5L2BU4B9gOOBb6XxrvU1e6tLf0qNzMbyfImkl0i4lJgfUTcFREfB44qcmJJ44H3ApekbaXPvDHtciUwLb0+IW2T3j867X8CcG1EvBoRvweWAQcXiSuPGVMn09LcNV+1NDcxY+rkep/azGzYydvYvj49r5T0XuBZsnm3irgI+CKwQ9reBVgbERvS9gqg0uDQTtYFmYjYIOmltH87cG/VZ1Yf8xpJZwBnAOy5554Fw97coO5eW2Zm+RPJVyS9DvgC8B1gR+BzAz1pWhRrVUQsknTEQD8nr4iYA8wB6OjoiBq75zJtSrsTh5kZORJJanOYFBG3kA1KPHIQzns48H5JxwHbkCWmi4FWSVulWsl4oDPt3wnsAayQtBXwOuCFqvKK6mPMzGwI1GwjiYiNwIcG86QRMTMixkfEBLLG8jsi4iPAnWSLaAFMB36WXt+ctknv3xERkcpPSb26JgKTgPsGM1YzM+tb3ltb90j6LnAd8OdKYUT8bpDj+RJwraSvAIuBS1P5pcCPJC0DXiRLPkTEEknXA48CG4AzU+IzM7MhouwP+xo7SXf2UBwRUajnViN0dHTEwoULGx2GmVmpSFoUER09vZd3ZPtgtIuYmdkIlHcciZmZWY+cSMzMrBAnEjMzK6TfiUTSnHoEYmZm5TSQGkmPrfZmZjY65R1HUm3VoEdRQl4h0cws0+9EEhHH1iOQMrlpcSczbniQ9ZuyMTida9cx44YHgS1XSHTCMbORzo3tA3D2zUteSyIV6zcFZ9+8pEtZZUnezrXrCDYvyXvTYk8HZmYjhxPJAKxdtz5XeV9L8pqZjRQ1E0laDnfAU8aPZl6S18xGg4bM/lt2O23bnKvcS/Ka2Wgw3Gb/LYVZx+/HjBsfZP3Gze0kzU3ivW/ajcMvuOO1hvUj92njJ4s6u9ze8pK8ZjbS5E0kB6bnc6vKgoLrtpdVT0vtHrlPG9fdt7xLT67r7lvOBw/egzsfX+1eW2Y2YuWaRn4kqdc08gee86seG+FbW5p5YNYxg34+M7OhNOBp5CXtmfMcayPi5X5HNoLk7cllZjbS1Lq1dSXZLSz1sU8AVwBXDVJMZmZWIn0mEi9old9O2zaz5i9b1j566+FlZjZSeEDiIJl1/H40N3WtuDU3iVnH79egiMzMhoYTyQDdtLiTwy+4g4ln3crhF9wBwOyTDqC9tQUB7a0tzD7pAPfQMrMRbyCz/456lTm0KuNDKnNonX/i/txz1qjsEW1mo5hrJAPgObTMzDZzIhkAz6FlZraZE8kAeA4tM7PNnEgG4Mh92vpVbmY2kjmRDMAtD67MXd69d5cXtTKzkca9tgYg73QovfXugi2X5DUzKyvXSOrIvbvMbDRwIhmAMb3MPNa93L27zGw0cCIZgE29zLzfvdy9u8xsNGhIIpG0h6Q7JT0qaYmkz6TynSXNl/Rket4plUvStyUtk/SQpIOqPmt62v9JSdOHIv72XhJB9/IZUyfT0tzUpcwrJJrZSNOoGskG4AsRsS9wKHCmpH2Bs4DbI2IScHvaBngPMCk9zgC+D1niAWYBhwAHA7MqyaeeZkydTHO3+1jNY7RFgpg2pZ3zT9y/y/xb55+4vxvazWxEaUivrYhYCaxMr/8k6TGgHTgBOCLtdiXwG+BLqfyqyJZzvFdSq6Td0r7zI+JFAEnzgWOBa+p+Ed3bSXppN5k2pd2Jw8xGtIa3kUiaAEwBFgDjUpIBeA4Yl163A8urDluRynorr6vZ85ayfmPXBpH1G8O9scxsVGroOBJJ2wM/AT4bES9Lm/+sj4iQNCgLyks6g+yWGHvumXf14N519tLrqnPtOg6/4A6eXbuO3VtbmDF1smsjZjbiNaxGIqmZLIlcHRFzU/Hz6ZYV6XlVKu8E9qg6fHwq6628i4iYExEdEdHR1lZ8GpO+1h3uXLuOSM8zbnhwwCPZPSLezMqiUb22BFwKPBYR36p662ag0vNqOvCzqvKPpd5bhwIvpVtg84BjJO2UGtmPSWV1lbeatH5TcPbNS/r9+ZUR8dVJaebch51MzGxYalSN5HDgH4CjJD2QHscBFwDvlvQk8K60DXAb8BSwDPgh8EmA1Mh+HnB/epxbaXgfLnqbTqUvHhFvZmXSqF5b/4/e7xAd3cP+AZzZy2ddBlw2eNHVttO2zaz5S/8TRF4eEW9mZdLwXltlNOv4/Whu6qulpBiPiDezMnEiyaF7wzfA7JMO6DLQcDB5RLyZlYmnka+ht6ngzz9xf+4566jX9pty7q96vN2107bN/T5npcvw7HlL3ZXYzIY9J5Ia+mr4rv6Hfdbx+zHjxge7DFRsbhKzjt9vQOf1iHgzKwvf2qqhtwbuyuDDvm53zT7pACcDMxvxXCOpYffWlj5HsleeZ9zwILNPPqDL7a7e3LS407etzGzEcCKp4ch92vjxvc/U3K8y+LB7QuieNI7cp42fLOr08rtmNmL41lYNdz6+Ove+va3ZXj1C/ep7n/FgQzMbUZxIaujttlYePTXU9za9igcbmllZ+dZWDU0SGyPf7FrNY+gy+29/ktAYiYln3eo2EzMrHddIasibRADWb+o6+29vY997Kt8Y4QkazayUnEhqKDJqPdgyabQ0N/GRQ/d8rZtwk7ZMK24zMbMy8a2tGvL22upNkCWj3rr6Tjzr1h6Pc5uJmZWFayQ19KfXVk+aa3zDnqDRzMrOiaSGojWD7u0m3VdN9ASNZlZ2TiQ1DHbNoPuqidOmtHP+ift3mVrl/BP3d68tMysNt5HUMGPqZGbc8CDrN+XvvVVL94GLnqDRzMrMNZIcBjOJmJmNNK6R1DBz7kN1+dzqgYsegGhmZeZEUsO69Zvq8rnVMwd70kYzKzPf2hoGPADRzMrMiWSY8ABEMysrJ5JhwgMQzays3EZSQ3s/Z/EdiDGQawCiV1Y0s+HINZIaJuxS/5rCJmDh0y/2uU9Pi2R5lmAzGw5cI6nh3qfWDMl5fnzvM1x97zOv1TSALrWPP7+6odeVFV0rMbNGciKpoT/rkRT12nxcNz4IsXkgZF+31txIb2aN5kRSQ39WSBws6zfmP58b6c2s0ZxIajj09Ttxz3/13X7RSIPZhuPGfDMbCCeSGoZzEgH4j0GKr9KYX2mH8Yh7M8tLMcS3bRqto6MjFi5cmHv/Cb2sYDicjdthLM//6a9dtmcet2+X2saR+7Rx5+OruzTmd5+VGKC1pZnttt6qSy0FcM3FbJSRtCgiOnp8r+yJRNKxwMVAE3BJRFzQ1/6jIZGUxbgdxvLu/f6WaxYsZ2METRIfOmQPbl7cycuvbu6htuPWTew//nVdaoeH770zE9u23+LYjr12rmuS6+n2H5QnsZY9/kYYCbd8B+MaRmwikdQEPAG8G1gB3A98KCIe7e0YJ5KRbYygetb/luamQVsorPvtP4DmMQJ17SAxmOccTGWPvxF6+s7K9v0M1jX0lUjKPiDxYGBZRDwVEX8FrgVOaHBM1kDdl44ZzAkxZ89busVYnvWbYotedsN1Es6yx98IPX1nZft+huIayp5I2oHlVdsrUlkXks6QtFDSwtWrVw9ZcDY8DNZYm/58znAc31P2+Buht++hTN/PUFxD2RNJLhExJyI6IqKjra2tX8fuuHVTnaKyoTJYY2368znDcXxP2eNvhN6+hzJ9P0NxDWVPJJ3AHlXb41PZoHnonGOdTEpkjLputzQ35ZoQM48ZUyfT0tz1t9A8RjQ3dT3pYJ5zMJU9/kbo6Tsr2/czFNdQ9nEk9wOTJE0kSyCnAB8e7JM8dM6xW5S5Eb64svXaqnxOWXs9lT3+RujtOyvT9zMU11DqXlsAko4DLiLr/ntZRHy1r/3722vLzMz67rVV9hoJEXEbcFuj4zAzG63K3kZiZmYN5kRiZmaFOJGYmVkhTiRmZlZI6Xtt9Zek1cDTOXffFfhjHcOpN8ffWGWPH8p/DY5/8OwVET2O6B51iaQ/JC3srbtbGTj+xip7/FD+a3D8Q8O3tszMrBAnEjMzK8SJpG9zGh1AQY6/scoeP5T/Ghz/EHAbiZmZFeIaiZmZFeJEYmZmhTiR9EDSsZKWSlom6axGx5OHpMskrZL0SFXZzpLmS3oyPe/UyBj7ImkPSXdKelTSEkmfSeWluAZJ20i6T9KDKf5zUvlESQvSb+k6SWMbHWtfJDVJWizplrRdmvgl/UHSw5IekLQwlZXi91MhqVXSjZIel/SYpMPKcA1OJN1IagL+FXgPsC/wIUn7NjaqXK4Aui+cchZwe0RMAm5P28PVBuALEbEvcChwZvrey3INrwJHRcQBwIHAsZIOBb4OXBgRbwDWAKc3MMY8PgM8VrVdtviPjIgDq8ZelOX3U3Ex8MuI2Ac4gOy/xfC/hojwo+oBHAbMq9qeCcxsdFw5Y58APFK1vRTYLb3eDVja6Bj7cS0/A95dxmsAtgV+BxxCNip5q1Te5bc13B5kK4zeDhwF3AKoZPH/Adi1W1lpfj/A64DfkzpBlekaXCPZUjuwvGp7RSoro3ERsTK9fg4Y18hg8pI0AZgCLKBE15BuCz0ArALmA/8FrI2IDWmX4f5bugj4IrApbe9CueIP4FeSFkk6I5WV5vcDTARWA5en24uXSNqOElyDE8koEdmfM8O+r7ek7YGfAJ+NiJer3xvu1xARGyPiQLK/7A8G9mlwSLlJeh+wKiIWNTqWAt4WEQeR3ZY+U9I7qt8c7r8fsoUGDwK+HxFTgD/T7TbWcL0GJ5ItdQJ7VG2PT2Vl9Lyk3QDS86oGx9MnSc1kSeTqiJibikt1DQARsRa4k+xWUKukykqkw/m3dDjwfkl/AK4lu711MeWJn4joTM+rgJ+SJfMy/X5WACsiYkHavpEssQz7a3Ai2dL9wKTUW2UscApwc4NjGqibgenp9XSydodhSZKAS4HHIuJbVW+V4hoktUlqTa9byNp3HiNLKCel3YZt/BExMyLGR8QEst/8HRHxEUoSv6TtJO1QeQ0cAzxCSX4/ABHxHLBc0uRUdDTwKCW4Bo9s74Gk48juFzcBl0XEVxscUk2SrgGOIJt2+nlgFnATcD2wJ9nU+R+IiBcbFWNfJL0NuBt4mM336P8PWTvJsL8GSW8CriT7zYwBro+IcyW9nuwv/J2BxcBHI+LVxkVam6QjgH+KiPeVJf4U50/T5lbAv0fEVyXtQgl+PxWSDgQuAcYCTwGnkX5PDONrcCIxM7NCfGvLzMwKcSIxM7NCnEjMzKwQJxIzMyvEicTMzApxIrHSkfTpNDPq1f045mxJnWlm2AckXVDPGIcLSW9PsxE/kMa3VMpbJX0yx/FHVGYCNuvNVrV3MRt2Pgm8KyJW9PO4CyPiG729KakpIjYWC23Y+QhwfkT8uFt5K9n3+L2hD8lGGtdIrFQk/QB4PfALSZ8bhM/7g6SvS/odcLKkvSX9Mk38d7ekfdJ+EyX9Z1rv4iuSXknlXf5il/RdSaem12+WdFf6rHlV01z8Jp3zPklPSHp7Km+S9A1Jj0h6SNKnJB0l6aaqz3+3pJ/SjaSj00R/Dytbm2ZrSZ8APgCc10Pt7QJg71RTma3M7HTuhyV9sIdzvCWdY+80kvyydA2LJZ2Q9jlV0tz0HT4p6V+K/Pexkmj09MN++NHfBz1MF57jmLPJ5ol6ID2mVn3WF6v2ux2YlF4fQjZVCGTTVHwsvT4TeCW9PgK4per47wKnAs3AfwBtqfyDZLMkAPwG+GZ6fRzw6/T6f5HNr1SZtn1nsqncH6/6nH8Hju92bduQzVj9P9L2VWSTXkK2Ts1JPXwfE+i65MDfk81Y3EQ2u+wzZFOWH0E2pfxbgUXAnmn/r5GNcoesdvMEsF269qfIpkTfhmwk9h6N/s34Ud+Hb23ZaNLbra3r4LWZh98K3JBN/QXA1un5cLJ/bAF+RLbgU18mA28E5lkIpHAAAAInSURBVKfPagJWVr1fmZRyEdk/6gDvAn4Qadr2SNNgSPoR8FFJl5NNBPmxHs71+4h4Im1fSZbsLqoRY7W3AddEdmvveUl3AW8BXgb+DpgDHBMRz6b9jyGb5PGf0vY2ZFN4QLYI00sp9keBvei6NIONME4kNmKkf2inAM9GxHH9OPTP6XkM2fobB/ayX0/zCW2g6y3ibSrhAEsi4rBePqsyX9VGav9/eDnwc+C/gRti8/ogQ2Ul2XVNASqJRMDfR8TS6h0lHcLma4N812cl5zYSGzEi4rTIllntTxKpPv5l4PeSToZsRmJJB6S37yGbFReyBuyKp4F9U5tEK9mMrZCtatcm6bD0Wc2S9qsRwnzgfypN2y5p5xTXs2T/gH+ZLKl0txSYIOkNafsfgLtqnOtPwA5V23cDH0ztNG3AO4D70ntrgfcC56cJHQHmAZ9Sqm5JmlLjfDaCOZGYdfUR4HRJDwJLgBNS+WfIFkt6mKpVAiNiOdnMrI+k58Wp/K9k069/PX3WA2S3zfpyCVnbxEPpmA9XvXc1sDwiHut+UET8N9kssTek+DYBP+jrRBHxAnBPalyfTTZz7kPAg8AdZO1Gz1Xt/zzwPuBfU63jPLJ2oIckLUnbNkp59l+zAZD0SkRsP4Tn+y6wOCIuHapzmuXlRGI2AEOZSCQtImvHeXcMw7VAzJxIzMysELeRmJlZIU4kZmZWiBOJmZkV4kRiZmaFOJGYmVkh/x9AZvdC6btnfwAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"length = [len(item[0]) for item in sorted_token_count]\n",
"frequency = [item[1] for item in sorted_token_count]\n",
"plt.scatter(length, frequency)\n",
"\n",
"plt.xlabel('f - Frequency of token')\n",
"plt.ylabel('|r - rank of token')\n",
"plt.title('Zipf\\'s Law')"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"from random import choice"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('to', 4154),\n",
" ('bhi', 2929),\n",
" ('the', 2866),\n",
" ('https', 1310),\n",
" ('mein', 1276),\n",
" ('ek', 1165),\n",
" ('status', 1108),\n",
" ('on', 1071),\n",
" ('http', 905),\n",
" ('co', 872),\n",
" ('aap', 820),\n",
" ('log', 448)]"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"choose = sorted_token_count[:100]\n",
"lst = []\n",
"for i in range(10):\n",
" item = choice(choose)\n",
" choose.remove(item)\n",
" lst.append(item)\n",
"lst.sort(key=lambda x:x[1], reverse=True)\n",
"lst"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"meanings = {'to':3, 'bhi':2, 'the':3, 'https':1, 'mein':}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 3. Heaps' Law"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"vsize = 0\n",
"num_tokens = 0\n",
"unique_tokens = []\n",
"V = []\n",
"N = []\n",
"\n",
"for i in range(len(tokens)):\n",
" s = tokens[i]\n",
" if s not in unique_tokens:\n",
" unique_tokens.append(s)\n",
" vsize += 1\n",
" V.append(vsize)\n",
" N.append(i+1)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"N = np.array(N)\n",
"V = np.array(V)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 1.0, \"Heaps' Law\")"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAEWCAYAAABMoxE0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3dd3zV1f3H8deHjWxkGPZUQBliBNyoLSJqsdYqOOvCOqq2tRVbq3b82tq6994D92hFERVQVEYYyoawiUDYhE2Sz++P74leaSCXJDc3N3k/H4/7uN97vuvz5YZ88j3nfM8xd0dERKS4qiQ7ABERSW1KJCIiUiJKJCIiUiJKJCIiUiJKJCIiUiJKJCIiUiJKJCIiUiJKJCKAmS0xsx/tUfYLMxufrJhCDHt90KuwmEWSQYlERERKRIlEJE5m1sLM3jSzNWa22Myui1nXx8y+MrONZrbSzB40sxox693MrjOzRWa21sz+bWZVwrpOZjbOzDaFda+WMM5GZvbfEOeGsNwqrDvRzGbEbDvazCbHfP7czM4syfml8lEiEYlD+KX/H+BroCVwMnCDmZ0SNskDfg00AY4K66/e4zA/BdKB3sBg4NJQ/lfgI6AR0Ap4oGAHd7dihFsFeAZoC7QBtgMPhnUTgM5m1sTMqgM9gBZmVs/Maof4Pi/GOaUSq5bsAETKkXfMLDfmcw1galg+Emjq7n8JnxeZ2RPAEGCUu0+J2W+JmT0GnADcG1N+h7uvB9ab2b3AUOBJYDfRL/0W7r4CKFG7jLuvA94s+Gxm/weMCeu2hzuQ44FviRLjRuAYYCewIOwvEjfdkYh870x3b1jw4od3FG2J/nLfWPAC/gA0BzCzg0MV0ioz2wz8nejuJNbymOWlQIuw/HvAgElmNsvMLqUEzOwAM3vMzJaGWD4DGppZ1bDJOKA/UTIZB4wlSnonhM8i+0WJRCQ+y4HFsYnG3eu5+6Cw/hFgLtDZ3esTJZk9q6Vaxyy3IbojwN1XufsV7t4CuBJ42Mw6lSDW3wKHAH1DLMeH8oJ49kwk41AikRJQIhGJzyQgx8xuMrPaZlbVzA4zsyPD+nrAZmCLmXUBrirkGL8LDeGtgeuBVwHM7OcFjeHABsCB/Djjqm5mtWJe1UIs24GNZtYYuG2Pfb4kSjR9gEnuPovojqsv0d2LyH5RIhGJg7vnAacDvYDFwFqi9o0GYZMbgfOAHOAJQpLYw7vAFGA68D7wVCg/EphoZluA94Dr3X1RnKGNJEoaBa/bidplaocYJwAf7nEtW4nafma5+65Q/BWw1N2z4zyvyHdME1uJJF54sLCzu2cmOxaR0qY7EhERKRElEhERKRFVbYmISInojkREREqk0j3Z3qRJE2/Xrl2ywxARSSlTpkxZ6+5NC1tX6RJJu3btyMjISHYYIiIpxcyW7m2dqrZERKRElEhERKRElEhERKRElEhERKRElEhERKRElEhERKRElEhERKRElEhERCq4FRu28cRni/gic21Cjl/pHkgUEakMMrO3MHr2aj6YuZJvVmwC4Kr+HTmm054zQJecEomISAWxKzefUbNW8cKEpUxavB6A7i0bcNPALpzWPY02Bx6QkPMqkYiIpLDcvHy+WLiO97/5lv9+s5Jtu/Jo0/gAbj61Cz/p1YK0BrUTHoMSiYhICpr97WZenLiUj2atYu2WXdSuXpUBhzZncK8W9D+4GVWqWJnFokQiIpIisnN28OKEZbz/zbcsXLOVmtWq8ONuzTm9Rxr9D2lGrepVkxKXEomISDm2Oy+fMXOzeWXSMsZnrmV3nnNc5yac37ctZ/VuScMDaiQ7RCUSEZHyKGvjdt6asoIRk5eTtXE7zerV5NJj2nPuka3p0LRussP7ASUSEZFywt35bMFanv9yCWPnryEv3+nTvjG3ntGNk7s0o1rV8vnonxKJiEiS7czN4z9fr+ThsZksWrOV5vVrctmx7TmvTxvaNamT7PCKpEQiIpIkC9ds4dXJy3ktYzkbt+2mW1p9/v7T7vzsiJbUrJachvPiUCIRESlDO3Pz+GRONo+MXciMrE1UrWIM6Nacc49szXGdm1K1DLvtlhYlEhGRMjBn5WaeGr+YD2euYsvOXFo0qMUtp3XlJz1b0Kx+rWSHVyJKJCIiCbJp+25GzVzFG1NWMGnJeurUqMqp3dMY1P0gju/ctNw2nu+vhCYSM6sFfAbUDOd6w91vM7P2wAjgQGAKcKG77zKzmsDzwBHAOuBcd18SjnUzcBmQB1zn7qNC+UDgPqAq8KS7/zOR1yQiUpTM7Bzu+XgBH85cRV6+0/bAAxh+aheGHNm6XDz3UdoSfUeyEzjJ3beYWXVgvJl9APwGuMfdR5jZo0QJ4pHwvsHdO5nZEOAO4Fwz6wYMAQ4FWgAfm9nB4RwPAT8GVgCTzew9d5+d4OsSEfmB3Lx8Pp6TzbNfLmbCouju49Jj2nFajxb0bNUAs9Rr+4hXQhOJuzuwJXysHl4OnAScF8qfA24nSiSDwzLAG8CDFv3rDwZGuPtOYLGZZQJ9wnaZ7r4IwMxGhG2VSESkTKzbspMRk5fzzBdLWLtlJwfVr8WNAw5maJ82HFi3ZrLDKxMJbyMxs6pE1VediO4eFgIb3T03bLICaBmWWwLLAdw918w2EVV/tQQmxBw2dp/le5T3TcBliIh8Jy/f+XjOal7PWMFn89ewKy+fYzs14YJ+h/GjruX3wcFESXgicfc8oJeZNQTeBrok+px7MrNhwDCANm3alPXpRaSCWLlpO29NzeKlCUv5dtMOmtaryYVHtWXIka3p3LxessNLmjLrteXuG81sDHAU0NDMqoW7klZAVtgsC2gNrDCzakADokb3gvICsfvsrTz23I8DjwOkp6d7qV2UiFR423flMW7+Gt6cuoJP5qwm3/l+2JKuzaleye4+CpPoXltNgd0hidQmahS/AxgDnE3Uc+ti4N2wy3vh81dh/afu7mb2HvCymd1N1NjeGZgEGNA59ALLImqQL2h7EREptuXrt/Hk54t4c2oWW3bmcmCdGlxxfAfOTS9/gyYmW6LvSNKA50I7SRXgNXf/r5nNBkaY2d+AacBTYfungBdCY/p6osSAu88ys9eIGtFzgWtClRlmdi0wiqj779PuPivB1yQiFZS780XmOp4cv4ix89ZgBmf0aME56a3p26Gx7j72wqKOVZVHenq6Z2RkJDsMESlHNm3bzetTlvPypGUsWrOVA+vU4Px+bTknvRWtGiVmnvNUY2ZT3D29sHV6sl1EKq3l67fxxOeLeHtqFjk7c+ndpiH/OrsHg3u1SKlBE5NNiUREKpUdu/P47zcreWPKciYsWk/1qsag7mlcfmwHurdqkOzwUpISiYhUCvNW5fDKpGW8Oz2LDdt207pxbW4ccDA/7d2Klg1rJzu8lKZEIiIV1sZtu3hrahZvTl3BrG83U72qMeDQgzi/TxuO6nhghR62pCwpkYhIhZKzYzdj563hlUnLmLBoHfkO3dLqc+vp3Rjcq0WlGbakLCmRiEiFMH91Dg9+msnIGSvJzXdaNqzN1f07MfCwgzispdo+EkmJRERSVn4Y82rE5OWMmZdNnRrVuKBfWwYedhBHtmuckrMNpiIlEhFJObvz8nn/m5U8OCaTzOwtNK1XkyuP78jlx7WniaquypwSiYikjGnLNvD2tCzenpZFzo5cDm5el/uHHs6gww6qdCPulidKJCJS7n2zYiP/HjWPzxespWa1Kgw49CAG92zBSV2aUUXVV0mnRCIi5dbcVZt5eMxC3vv6WxoeUJ0bBxzML45pT92a+tVVnujbEJFyZ9Li9Tzx+SJGz15NrepVuKp/R67u35F6taonOzQphBKJiJQL+fnOmHnZPDpuIZOXbKDhAdW59sROXHZsexrVqZHs8GQflEhEJKl27M7jnWlZvDRxGTOyNpHWoBa3nt6NoX3aULuGBk5MBUokIpIUm7bv5sUJS3nuyyVk5+ykfZM6/OvsHvykZwtqVVcCSSVKJCJSpjZs3cULE5byzBeL2bBtN8d1bsLd5/TimE4a+ypVKZGISJlYvn4b94yez3+/WcmuvHyO7dSEmwZ20dDtFYASiYgk1LxVOTw2biHvfv0t1asa5xzZigv7teOQg+olOzQpJUokIpIQo2ev5qExmUxfvpE6NapyYb+2XH5ce01dWwEpkYhIqZq8ZD33fbyA8Zlr6dC0Dn8Y1IWf9W6l4dsrMCUSESmx/HxnfOZaHvtsIV9krqNJ3ZrcfGoXLjmmPTWqaQysii6uRGJmvwJedPcNCY5HRFKIuzNyxioeGpPJ7JWbaVK3Jn8Y1IXz+7aljoYxqTTi/VOhOTDZzF4zs4EWRx89M2ttZmPMbLaZzTKz60P57WaWZWbTw2tQzD43m1mmmc0zs1NiygeGskwzGx5T3t7MJobyV81Mj7+KlIHcvHxGzVrF6Q+M55qXp5Kzczf/PrsHXww/kWHHd1QSqWTM3ePbMEoeA4BLgHTgNeApd1+4l+3TgDR3n2pm9YApwJnAOcAWd79zj+27Aa8AfYAWwMfAwWH1fODHwApgMjDU3Web2WvAW+4+wsweBb5290f2dR3p6emekZER1zWLyA+5O6Nnr+av789m+frttGl8AL86qRNn9W6lSaQqODOb4u7pha2L+88Gd3czWwWsAnKBRsAbZjba3X9fyPYrgZVhOcfM5gAt93GKwcAId98JLDazTKKkApDp7ovCxYwABofjnQScF7Z5Drgd2GciEZH9l5cfJZCnv1jMpMXr6dCkDg+d15sfd2uuNhCJu43keuAiYC3wJPA7d99tZlWABcD/JJI99m8HHA5MBI4BrjWzi4AM4Leh7aUlMCFmtxV8n3iW71HeFzgQ2OjuuYVsv+f5hwHDANq0aVP0BYsIADtz83hzShbPfrmY+au30Lx+TW4/oxtD+rTRMCbynXjvSBoBZ7n70thCd883s9P3taOZ1QXeBG5w981m9gjwV8DD+13Apfsd+X5w98eBxyGq2krkuUQqgm27cnl18nIeHruQNTk76ZpWn7vP6ckZPVtQXTMRyh6KTCRmVhUY4u63F7be3efsY9/qREnkJXd/K2y/Omb9E8B/w8csoHXM7q1CGXspXwc0NLNq4a4kdnsRKYZtu3J55oslPDpuITk7cunbvjF3n9OTYzs10ThYsldFJhJ3zws9ptq4+7J4Dxwa558C5rj73THlaaH9BOCnwMyw/B7wspndTdTY3hmYBBjQ2czaEyWKIcB5oc1mDHA2MAK4GHg33vhE5HuL1mzhsXGLeH/GSrbszOVHXZtz5QkdSG/bSAlEirQ/VVuzzGwSsLWg0N1/so99jgEuBGaY2fRQ9gdgqJn1IqraWgJcGY41K/TCmk3UmH+Nu+cBmNm1wCigKvC0u88Kx7sJGGFmfwOmESUuEYnT4rVbuf+TBbw9LYsa1arwk54tGHJka9LbNU52aJJC4ur+a2YnFFbu7uNKPaIEU/dfEcjauJ2Hx2QyYvJyqlc1LjqqHZcf155m9WolOzQpp0rc/TcVE4aI/K/szTt4ZNxCXpq4jPx8Z8iRrbn+5M40q68EIsUXb/fffsADQFegBlEV01Z3r5/A2ESklCxeu5UXvlrKaxnL2bYrl58e3opf/7izRuKVUhFvG8mDRI3crxM91X4R3z91LiLlVGb2Fh4em8k707IwMwZ1T+P6kzvRqZnmApHSsz9PtmeaWdXQAP6MmU0Dbk5caCJSXNk5O3hq/GKe/HwxBlxyTHuuPKGD2kAkIeJNJNvCgIjTzexfREOf6KkkkXJm3qocnh6/mHemZ7E7L58zerbgT6d3o4nmApEEijeRXEjULnIt8GuiBwR/lqigRCR+7s64+Wt4/LNFfLlwHbWqV+HMXi254vgOdGpWN9nhSSUQb6+tgqFRtgN/Tlw4IhKv/Hznk7nZPDI2k6nLNpLWoBY3DjiY8/u2pVEdzaggZWeficTMZhA9OFgod+9R6hGJyD7l5zsfzlrFPaPnsyB7C60a1eYvgw9laJ82GgdLkqKoO5J9DsgoImUnNy+fd6Z/y5OfL2Luqhw6NKnDXT/vyeBeLaimBCJJtM9EsudovyKSHJ/OXc0/P5jL/NVb6HJQPf51dg/OOrylEoiUC/E+kJjD91VcNYDq6IFEkYT7bP4a7v9kARlLN9C+SR3uG9KLn/RsoYEUpVyJt7H9u6eXwqi+g4F+iQpKpLLLWLKeez6ezxeZ62jRoBa3nt6NC/q11WyEUi7F/UBiAY9GeXzHzG4Dhpd+SCKVU8F86E9+vphJS9ZzYJ0a3HJaVy48qi01q2k2Qim/4q3aOivmYxWiYVJ2JCQikUpoxopN/OW/s5i8ZANpDWrxx0FdOb9fGw6osd9/64mUuXh/Ss+IWc4lmkdkcKlHI1LJLF23lQc+zeTNqSs4sE4N/nrmYQw9srUa0SWlxNtGckmiAxGpTL5evpGHx2by0ezVVDXj8mPbc+2JnWlwQPVkhyay3+Kt2uoA3EfUwO7AV8Cv3X1RAmMTqXCyc3bwz5FzeWtaFg0PqM7V/TtyQb+2pDWonezQRIot3qqtl4GHiOZYh2hI+VeAvokISqSi2b4rj6fGL+KRsQvZnef88oSOXH1iR+rX0h2IpL54E8kB7v5CzOcXzex3iQhIpCLZuG0Xz3yxhJcmLmXtll2c1KUZfxjUVYMpSoVS1FhbjcPiB2Y2HBhBVLV1LjAywbGJpKzsnB28MnE5T41fxOYduZzUpRnDju9Avw4HJjs0kVJX1B3JFKLEUfAY7ZUx6xxNbCXyA1t25vLsF4t5ZOxCtu7K4+QuzfjNgIM5tEWDZIcmkjBFjbXVviQHN7PWwPNAc6LE87i73xfudF4F2hF1JT7H3TeEp+bvAwYB24BfuPvUcKyLgVvCof/m7s+F8iOAZ4HaRHdJ14eHJkXKTG5ePi9NXMa9H89nw7bdnNSlGTef2oXOzTWlrVR8cT/tZGaHAd2A7+bqdPfni9gtF/itu081s3rAFDMbDfwC+MTd/xmqzIYDNwGnAp3Dqy/wCNA3JJ7biB6E9HCc99x9Q9jmCmAiUSIZCHwQ73WJlNS0ZRv407szmZm1mX4dGjP81K70at0w2WGJlJl4u//eBvQnSiQjiX7hjye629grd19JNC0v7p5jZnOAlkQPM/YPmz0HjCVKJIOB58MdxQQza2hmaWHb0e6+PsQzGhhoZmOB+u4+IZQ/D5yJEomUgayN2/m/92czcsYqmtStyQNDD+f0HmkaUFEqnXjvSM4GegLT3P0SM2sOvLg/JzKzdsDhRHcOzUOSAVhFVPUFUZJZHrPbilC2r/IVhZTvee5hwDCANm3a7E/YIv8jNy+ft6Zm8fcP5rBjdx7XntiJX/bvSN2aGs5EKqd4f/K3u3u+meWaWX0gm2je9riYWV3gTeAGd98c+xebu7uZJbRNw90fBx4HSE9PV/uJFNvSdVu5+qWpzPp2M4e3aci/z+5Bp2ZqB5HKLd5EkmFmDYEniHpybSF6ur1IZladKIm85O5vheLVZpbm7itD1VV2KM/ihwmqVSjL4vuqsILysaG8VSHbi5Sq3Lx8nv1yCf8aNY/a1aty35BenNGjBVWqqBpLJN6xtq4Oi4+a2YdE7RLfFLVf6IX1FDDH3e+OWfUecDHwz/D+bkz5tWY2gqixfVNINqOAv5tZo7DdAOBmd19vZpvNrB9RldlFwAPxXJNIvCYsWsef/zObOSs3c1znJvzjrO60anRAssMSKTfibWz/KfCpu29y9yWhEfxMd3+niF2PAS4EZpjZ9FD2B6IE8pqZXQYsBc4J60YSdf3NJOr+ewlASBh/BSaH7f5S0PAOXM333X8/QA3tUkqWrtvKPaPn8870b0lrUEuN6SJ7YfE8cmFm09291x5l09z98IRFliDp6emekZGR7DCkHNu2K5d7P17AM18sxjAuO64915/cmVrVNbmUVF5mNsXd0wtbF28bSWGTI6iLilQ4Y+Zm8+f/zGLJum2cfUQrbhxwCAc1qFX0jiKV2P40tt9NNAIwwDVEje4iFcLqzTu4Z/R8RkxeTqtGtXn58r4c3alJssMSSQnxJpJfAX8iGtYEYDRRMhFJaXn5zhOfL+Ke0fPJzXeGHd+B351yCNU1Q6FI3OLttbUVGB6GOXF335LYsEQS76uF6/jHB3P4ZsUmftS1GX86vRttD6yT7LBEUk68vba6Ew2H0jh8Xgtc7O4zExibSEKs27KTez6ez4sTltGyYW3u/HlPfta7pXpjiRRTvFVbjwG/cfcxAGbWn+hJ8aMTFJdIqdu2K5cnPlvMU+MXkbMzl0uOacfvT+lC7RrqjSVSEvEmkjoFSQTA3ceameoAJCW4O58vWMst78xk2fpt/Khrc24aeIiGeBcpJfEmkkVm9iegYLrdC4BFiQlJpPTMW5XD396fzecL1tKm8QG8fEVfju6o3lgipSneRHIp8GegYKysz0KZSLmUnbODf304jzenrqB+reoMP7ULlxzTjprVVI0lUtriTSTt3f26hEYiUgrcnXemZ3Hru7PYuTufy49tz1X9O9G4To1khyZSYcWbSO4ys4OAN4BX1VtLyqNVm3bwuze+5vMFa+ndpiH/OrsnnZrVTXZYIhVevM+RnBgSyTnAY2FOklfd/W8JjU4kDrvz8nnhq6Xc+/F8dubmc8tpXbnkmPZU1RDvImUi7vGy3H0VcL+ZjQF+D9wKKJFIUs3+djM3vv41s1dG86X//afd6dBUdyEiZSneBxK7AucCPwPWEQ2V8tsExiWyTzk7dnPnqHm8NHEZ9WtX59ELejPwsLRkhyVSKcV7R/I0MAI4xd2/TWA8IkV6e9oK/u/9uazdspOhfVrzu1O6qDFdJInibSM5KtGBiBQlO2cHv3/jG8bOW0PP1g158uJ0erVumOywRCo9zSki5V5uXj4vTFjKHR/OJd/hD4O6cNmxHdSYLlJOKJFIufbZ/DX8feQc5q7K4bjOTbjtjEPVpVeknNnnpAtm9kJ4v75swhGJ7Nidx63vzuSipyexbVce9w3pxfOX9lESESmHirojOcLMWgCXmtnzwA/qEtx9fcIik0pr/IK13PruTBat3cpFR7XlD4O6ar50kXKsqETyKPAJ0IFoat3YROKhXKRU5OU7j45byJ0fzaN1owN47tI+nHBw02SHJSJF2GfVlrvf7+5dgafdvYO7t495FZlEzOxpM8s2s5kxZbebWZaZTQ+vQTHrbjazTDObZ2anxJQPDGWZZjY8pry9mU0M5a+amfqApqiJi9ZxxgPj+feoeQzo1pxRNxyvJCKSIuKamNrdrzKznmZ2bXj1iPP4zwIDCym/x917hddIADPrBgwBDg37PGxmVc2sKvAQcCrQDRgatgW4IxyrE7ABuCzOuKSc2J2XzwOfLGDoExPYsG0XD553OI9ecIQmmxJJIXElEjO7DngJaBZeL5nZr4raz90/A+JtRxkMjHD3ne6+GMgE+oRXprsvcvddRA9GDrZoXtSTiAaSBHgOODPOc0k5kJmdw08e/IK7Rs/n1MPS+PCG4zm9RwtNeSuSYuLt/ns50NfdtwKY2R3AV8ADxTzvtWZ2EZAB/NbdNwAtgQkx26wIZQDL9yjvCxwIbHT33EK2/wEzGwYMA2jTpk0xQ5bSkpuXz5PjF3P3R/M5oGZVHjm/N6d21/AmIqkqrjsSokb2vJjPeezRg2s/PAJ0BHoBK4G7inmcuLn74+6e7u7pTZuq3j2ZZqzYxJkPf8E/P5jL8Qc35aMbjlcSEUlx8d6RPANMNLO3w+czgaeKc0J3X12wbGZPAP8NH7OA1jGbtgpl7KV8HdDQzKqFu5LY7aWccXdenrSMW9+dReM6NXhg6OGc3iNN1VgiFUC8Y23dbWZjgWND0SXuPq04JzSzNHdfGT7+FCjo0fUe8LKZ3Q20ADoDk4jufDqbWXuiRDEEOM/dPQxpfzZRu8nFwLvFiUkSa9GaLdz67izGZ67luM5NePC83jSoXT3ZYYlIKdmf+UimAlP35+Bm9grQH2hiZiuA24D+ZtaL6DmUJcCV4fizzOw1YDaQC1zj7nnhONcCo4CqRF2RZ4VT3ASMMLO/AdMo5l2SJEZuXj73f5rJo+MWUrNaFf78k0O5sF9bqmiMLJEKxdw92TGUqfT0dM/IyEh2GBXe6s07+O1rXzM+cy0DujXnr2ceRvP6tZIdlogUk5lNcff0wtZp0EYpdV8uXMt1r0wjZ0cud/ysO+ceqZ5yIhVZ3InEzNoCnd39YzOrDVRz95zEhSapxt157LNF3DlqHm0aH8BLl/fjkIPqJTssEUmweKfavYLoOYzGRF13WxGNw3Vy4kKTVBI76dQphzbnX2f3VIO6SCUR7x3JNURPmE8EcPcFZtYsYVFJShk7L5sbX/+azTty+eOgrlx2bHs1qItUIvEmkp3uvqugz7+ZVSPqdSWV2KZtu/nr+7N5Y8oKuhxUjxcu60vXtPrJDktEyli8iWScmf0BqG1mPwauBv6TuLCkvBszN5ub3vyGdVt38csTOnL9yZ010KJIJRVvIhlONLLuDKLnPka6+xMJi0rKrbVbdnLzWzMYPXs1nZrV5amLj6R7qwbJDktEkijeRHI+0ci83yUPMzvd3f+7j32kgvly4Vp++9rXrN+6i9+dcgiXHdteMxeKSNyDNj4AfG5mXWPK/pKAeKQccneeGr+YC56cSK3qVXnzqqO55sROSiIiAsR/R7KYqGrrDTO73d1fp/ij/0oK+Xbjdm55Zyafzs3m5C7NuG/o4dStqedYReR78f5GcHefamYnAK+YWV+ica+kApu+fCO/fGEKm3fs5qaBXbjy+A7q1isi/yPeqq2VAO6+FjiFqOvvYYkKSpLL3XktYznnPvYVVasYb151NFf176gkIiKFincY+dNilvOB34WXVDAbt+3iuhHT+Wz+Go5s14hHLjiCJnVrJjssESnH9plIzOxed7/BzP5DIQ8guvtPEhaZlLkJi9Zx4+tfk715J7ed0Y0L+7WlWtV4b1pFpLIq6o7khfB+Z6IDkeQaMWkZt7wzk7SGtXjx8r70ad842SGJSIrYZyJx9ynhfRyAmVUnahvJcvfsxIcnibYzN487Rx71KqgAABRySURBVM3jic8Xc2ynJjx8QW/q19JgiyISv33WW5jZo2Z2aFhuAHwNPA9MM7OhZRCfJNDaLTsZ+vgEnvh8Mef3bcNTv0hXEhGR/VZU1dZx7v7LsHwJMN/dzzSzg4APgFcSGp0kzKxvN3HFcxms3bqLB887nNN7tEh2SCKSoopKJLtiln8MvA7g7qsKRgKW1DN+wVqGvZBBg9rVefOXR2usLBEpkaISyUYzOx3IAo4herq9YBj52gmOTUqZu/Pw2IXc9dE8Ojaty3OX9qFFQ32NIlIyRSWSK4H7gYOAG9x9VSg/GXg/kYFJ6dqxO49b3pnJG1NWcFqPNP5xVne1h4hIqdhnY7u7z3f3ge7ey92fjSkf5e6/LergZva0mWWb2cyYssZmNtrMFoT3RqHczOx+M8s0s2/MrHfMPheH7ReY2cUx5UeY2Yywz/2m+rZCbdq+m6FPTOCNKSu49sROPDj0cCURESk1iX7a7Flg4B5lw4FP3L0z8En4DHAq0Dm8hgGPQJR4gNuAvkTT/d5WkHzCNlfE7LfnuSq95eu38fNHv2Rm1iYePO9wbjzlEJRvRaQ0JTSRuPtnwPo9igcDz4Xl54AzY8qf98gEoKGZpRGN7TXa3de7+wZgNDAwrKvv7hPc3Ym6JZ+JfGfasg389OEvWblpB8/8oo96ZolIQiRj/Ivm7r4yLK8CmofllsDymO1WhLJ9la8opPx/mNkwM8sws4w1a9aU/ApSwIczV3HOY19Rq3oV3rrqaI7t3CTZIYlIBbXficTMSm1WxHAn8T9jeJU2d3/c3dPdPb1p06aJPl1SuTv3fjyfX744hUMOqse71xxD5+b1kh2WiFRgxbkjKfSv/v2wOlRLEd4LhlrJAlrHbNcqlO2rvFUh5ZVWfr5zyzszuffjBZx1eEve+OXRHKiRe0UkwYqTSKaV8JzvAQU9ry4G3o0pvyj03uoHbApVYKOAAWbWKDSyDwBGhXWbzaxf6K11UcyxKh1350/vzuSlicsYdnwH7vx5T02FKyJlYr/nTHX3S+Pd1sxeAfoDTcxsBVHvq38Cr5nZZcBS4Jyw+UhgEJAJbCMakgV3X29mfwUmh+3+4u4FDfhXE/UMq000ZMsH+3s9FYG78/eRc3hp4jKuPKEDwwd2Uc8sESkzFjVTVB7p6emekZGR7DBKTX6+89f3Z/PMF0s4v28b/nbmYUoiIlLqzGyKu6cXtm6/70ik/MgLbSKvTFrGL45ux21ndFMSEZEyp0SSonLz8vnj2zN5NWM5V/XvyO/1oKGIJElRU+1eFOdxprv7N6UQj8Rhx+48fvPadEbOWMXV/Tvy+4Fdkh2SiFRiRd2RtI/zOEtKGIfEKWfHbi56ehLTlm3kpoFduKp/x2SHJCKVXFGJ5O/uvrtMIpEirduyk4ufmcSclTk8fH5vBnVPS3ZIIiJFPkeSZWZPmtnJGlk3uRav3cpZj3zJgtVbePzCI5RERKTcKCqRdCV6fuMWYLmZ3RceFpQylJm9hSGPf0XOjlxevqIvJ3dtXvROIiJlpKj5SNa5+2PufiLREO6LgHvMbKGZ/V+ZRFjJLV+/jUufnUxevvPKFf04om3jZIckIvIDcQ+R4u7fAk8RzQGSA1yeqKAksnbLTs597Cs2bNvFExelc8hBGnxRRMqfIhOJmdUys5+b2VtEw5ecRDQZlSa3SKBN23Zz2XMZrNu6ixcv68vhbRoVvZOISBIU9RzJy8CPgHHAS8B57r6jLAKrzDbv2M0FT01k7qrNPHReb3q2bpjskERE9qqo7r8fAle6e05ZBCPRw4bDns9g7qrNPHbhEZzURQ3rIlK+FZVIxgIFw7fvy0Z331w6IVVeu3LzueL5DCYuXs895/RSEhGRlFBUInmuiPUQzXD4LNGc6VJM7s5Nb37D5wvWcsfPunPm4SWdP0xEpGzsM5GEbr+SYO7OXR/N5+1pWfzmxwdz7pFtkh2SiEjcijNDopSyR8Yt5MExmZyT3opfndQp2eGIiOwXJZIkGzVrFf8eNY/TeqRxx896aCh4EUk5SiRJlJm9hV+/Op0erRpy59k9lUREJCUpkSTJhq27uOy5ydSsVoVHL+hN7RpVkx2SiEixaIbEJNiZm8eVL05h5cYdvHxFX9Ia1E52SCIixaZEkgT/GDmXSYvXc9+QXqS30yCMIpLaVLVVxsbNX8OzXy7hgn5tGNxLz4qISOpLWiIxsyVmNsPMpptZRihrbGajzWxBeG8Uys3M7jezTDP7xsx6xxzn4rD9AjO7OFnXE4/M7Byue2UahzSvx/BTuyY7HBGRUpHsO5IT3b2Xu6eHz8OBT9y9M/BJ+AxwKtA5vIYRDWWPmTUGbgP6Es2Xclscw7kkxcZtu7jsuQyqV63C4xcdQd2aqlUUkYoh2YlkT4P5fliW54AzY8qf98gEoKGZpQGnAKPdfb27bwBGAwPLOuii5Oc7v3nta7I2bOfh83vT9sA6yQ5JRKTUJDOROPCRmU0xs2GhrLm7rwzLq4CCUQtbAstj9l0RyvZW/gNmNszMMswsY82aNaV5DXF59sslfDo3mz+e1pU+7dW4LiIVSzLrV4519ywzawaMNrO5sSvd3c3MS+NE7v448DhAenp6qRwzXovWbOGfH87l5C7N+MXR7cry1CIiZSJpdyTunhXes4G3ido4VocqK8J7dtg8C2gds3urULa38nIhP9/507szqVG1Cv84q7ueXBeRCikpicTM6phZvYJlYAAwE3gPKOh5dTHwblh+D7go9N7qB2wKVWCjgAFmVjBnyoBQVi68MnkZX2Su4+ZBXWhWv1aywxERSYhkVW01B94Of6FXA1529w/NbDLwmpldBiwFzgnbjwQGEc0Zvw24BMDd15vZX4HJYbu/uPv6sruMvVu6biv/HDmXvu0bc14fDQsvIhVXUhKJuy8CehZSvg44uZByB67Zy7GeBp4u7RhLIj/fuX7EdDC48+cajFFEKjY9zJAAL05cyvTlG7n33F60bnxAssMREUmo8vYcScpbv3UXd46ax9EdD2RwrxbJDkdEJOGUSErZP0bOYfvuPG7/yaGq0hKRSkGJpBSNmZfN61NWcOmx7Tm4eb1khyMiUiaUSErJ9l153PL2TDo3q8uvf3RwssMRESkzamwvJY99tpCsjdsZMawftaprtkMRqTx0R1IKlq/fxqPjFjKo+0H063BgssMRESlTSiSl4O8j5wBwy2ndkhyJiEjZUyIpoclL1vPBzFVc3b8TLRpq7nURqXyUSEro7o/m06RuDa44rkOyQxERSQolkhL4fMEavlq0jqv6d6J2DTWwi0jlpERSTO7OXR/Np2XD2pzfV4MyikjlpURSTF9krmP68o1cc2IndfcVkUpNiaSYnvliMU3q1uRnR/zPzL4iIpWKEkkxLFidw6fzsjmvT2tqVtPdiIhUbkokxfDSxGVUq2L84pj2yQ5FRCTplEj207Zdubw5ZQWndU+jcZ0ayQ5HRCTplEj203vTvyVnZy4X9Gub7FBERMoFJZL99Na0LDo1q8sRbRslOxQRkXJBiWQ/rNq0g8lL1nNa9zRNWiUiEiiR7Id3p2fhDmf1VpdfEZECFSKRmNlAM5tnZplmNjxR5/lkTjbd0urT9sA6iTqFiEjKSflEYmZVgYeAU4FuwFAzK/Xx3LftymXqsg2ccEjT0j60iEhKS/lEAvQBMt19kbvvAkYAg0v7JNOWbSQ33+nbvnFpH1pEJKVVhETSElge83lFKPuOmQ0zswwzy1izZk2xTlKjWhV+1LUZPVs1LH6kIiIVUKWYs93dHwceB0hPT/fiHOPIdo05sp3uRkRE9lQR7kiygNYxn1uFMhERKQMVIZFMBjqbWXszqwEMAd5LckwiIpVGyldtuXuumV0LjAKqAk+7+6wkhyUiUmmkfCIBcPeRwMhkxyEiUhlVhKotERFJIiUSEREpESUSEREpESUSEREpEXMv1vN5KcvM1gBLi7l7E2BtKYaTTBXlWnQd5Yuuo/wprWtp6+6FDjZY6RJJSZhZhrunJzuO0lBRrkXXUb7oOsqfsrgWVW2JiEiJKJGIiEiJKJHsn8eTHUApqijXousoX3Qd5U/Cr0VtJCIiUiK6IxERkRJRIhERkRJRIomTmQ00s3lmlmlmw5MdTwEzW2JmM8xsupllhLLGZjbazBaE90ah3Mzs/nAN35hZ75jjXBy2X2BmF8eUHxGOnxn2tVKK+2kzyzazmTFlCY97b+co5eu43cyywncy3cwGxay7OcQ0z8xOiSkv9OcrTI8wMZS/GqZKwMxqhs+ZYX27El5HazMbY2azzWyWmV0fylPqO9nHdaTid1LLzCaZ2dfhWv5c3POX1jXulbvrVcSLaHj6hUAHoAbwNdAt2XGF2JYATfYo+xcwPCwPB+4Iy4OADwAD+gETQ3ljYFF4bxSWG4V1k8K2FvY9tZTiPh7oDcwsy7j3do5Svo7bgRsL2bZb+NmpCbQPP1NV9/XzBbwGDAnLjwJXheWrgUfD8hDg1RJeRxrQOyzXA+aHeFPqO9nHdaTid2JA3bBcHZgY/v326/yleY17jbU0filU9BdwFDAq5vPNwM3JjivEsoT/TSTzgLSwnAbMC8uPAUP33A4YCjwWU/5YKEsD5saU/2C7Uoi9HT/8BZzwuPd2jlK+jtsp/JfWD35uiObQOWpvP1/hF8laoNqeP4cF+4blamE7K8Xv5l3gx6n6nRRyHSn9nQAHAFOBvvt7/tK8xr29VLUVn5bA8pjPK0JZeeDAR2Y2xcyGhbLm7r4yLK8CmoflvV3HvspXFFKeKGUR997OUdquDVU+T8dU1ezvdRwIbHT33D3Kf3CssH5T2L7EQpXI4UR/Aafsd7LHdUAKfidmVtXMpgPZwGiiO4j9PX9pXmOhlEhS37Hu3hs4FbjGzI6PXenRnxQp18e7LOJO4DkeAToCvYCVwF0JOEdCmFld4E3gBnffHLsulb6TQq4jJb8Td89z915AK6AP0CXJIRVKiSQ+WUDrmM+tQlnSuXtWeM8G3ib6YVttZmkA4T07bL6369hXeatCyhOlLOLe2zlKjbuvDr8A8oEniL6T4lzHOqChmVXbo/wHxwrrG4Tti83MqhP98n3J3d8KxSn3nRR2Han6nRRw943AGKJqpv09f2leY6GUSOIzGegcejLUIGrIei/JMWFmdcysXsEyMACYSRRbQW+Zi4nqiQnlF4UeN/2ATaFKYRQwwMwahVv+AUR1oiuBzWbWL/SwuSjmWIlQFnHv7RylpuCXYvBTou+k4NxDQu+a9kBnogboQn++wl/nY4CzC4k39jrOBj4N2xc3ZgOeAua4+90xq1LqO9nbdaTod9LUzBqG5dpEbT1zinH+0rzGwpVWo1ZFfxH1UplPVEf5x2THE2LqQNTT4mtgVkFcRHWcnwALgI+BxqHcgIfCNcwA0mOOdSmQGV6XxJSnE/2nWwg8SOk1Hr5CVMWwm6gO9rKyiHtv5yjl63ghxPlN+E+cFrP9H0NM84jpAbe3n6/wHU8K1/c6UDOU1wqfM8P6DiW8jmOJqpS+AaaH16BU+072cR2p+J30AKaFmGcCtxb3/KV1jXt7aYgUEREpEVVtiYhIiSiRiIhIiSiRiIhIiSiRiIhIiSiRiIhIiSiRSMoyMzezu2I+32hmt+/nMZ61aFTYmuFzEzNbUkrx9Tez/5bGsYo4T9MwUus0Mztuj3U3mNkBcRxjS+IilIpOiURS2U7gLDNrUsLj5BE9+1CumFnVODc9GZjh7oe7++d7rLuBaMA/kYRRIpFUlks0H/WvS3ice4FfxwwJAfzvHYWZPWhmvwjLS8zsHxbmgTGz3mY2yswWmtkvYw5T38zet2jOh0fNrErYf4CZfWVmU83s9TA2VMFx7zCzqcDP94innZl9atHAg5+YWRsz60U0DPvgEEvtmO2vA1oAY8xsTCgbatGcIDPN7I49/yHCHdlXZnZa+Pw7M5sczvnnmDjmmNkTFs2T8VHBec3sOovmAvnGzEYU7+uQVKNEIqnuIeB8M2tQgmMsA8YDF+7vfh4NqPc58CzRkBL9gD/HbNMH+BXRnBAd+f4O6hbgRx4NuJkB/CZmn3Xu3tvd9/xF/ADwnLv3AF4C7nf36cCtRHNP9HL37QUbu/v9wLfAie5+opm1AO4ATiIavPBIMzuzYHszaw68T/QE9ftmNoBoOI0+Yfsj7PtBQTsDD7n7ocBG4GehfDhweIgxNqFKBVat6E1Eyi9332xmzwPXAduL2n4f/kE0ntD7+7FPwXhrM4gmIMoBcsxsZ8EYScAkd18EYGavEA3hsYMosXwRDQ1FDeCrmOO+upfzHQWcFZZfILoT2R9HAmPdfU2I5yWiibneIZo46RPgGncfF7YfEF7Twue6RAlkGbA4JDGAKURzskA0nMdLZvZOOK5UAkokUhHcSzTpzzOFrTSzUURzXGS4++WFbePuCyya9+GcmOJcfnjXXmuP3XaG9/yY5YLPBf+39hyDyInGqRrt7kMLvRrYupfyRMolSginAAWJxIB/uPtjsRtaNM9H7PXmAQVVaqcRJaczgD+aWXf/fl4LqaBUtSUpz93XE00Netle1p8Sqn0KTSIx/g+4MebzUqBbGDW1IVGj9v7qE0ZXrQKcS1SFNgE4xsw6wXejOB8cx7G+JBqhFeB8oiq1ouQQTTkL0SB8J4R2kKpEsxQWJA0n6nDQxcxuCmWjgEtj2m9amlmzvZ0oXGNrdx8D3EQ0jHndOGKUFKc7Eqko7gKuLckB3H1WaOTuHT4vN7PXiEZeXcz3VTz7YzLRSLediIbmftvd80Oj/SsF3Y6J2kzmF3GsXwHPmNnvgDXAJXGc/3HgQzP7NrSTDA9xGPC+u383PLi755nZUOA9M8tx94fNrCvwVaiC2wJcQHQHUpiqwIuhvcqI2nA2xhGjpDiN/isiIiWiqi0RESkRJRIRESkRJRIRESkRJRIRESkRJRIRESkRJRIRESkRJRIRESmR/wdnK1o3o/TkYAAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.plot(N, V)\n",
"plt.xlabel('N - Number of tokens')\n",
"plt.ylabel('|V| - Size of vocabulary')\n",
"plt.title('Heaps\\' Law')"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1000 610\n",
"10000 3597\n",
"20000 5920\n",
"30000 7753\n",
"50000 10806\n",
"100000 16675\n"
]
}
],
"source": [
"for i in [1000, 10000, 20000, 30000, 50000, 100000]:\n",
" print(i, V[i-1])"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"from math import log"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.3188109468386284\n",
"0.6824809994294719\n",
"1.3976147883124628\n",
"0.655343979207257\n",
"1.5589214096765935\n",
"0.6405478197637083\n"
]
}
],
"source": [
"print(8385/6358)\n",
"print(log(1.3188, 3/2))\n",
"print(11719/8385)\n",
"print(log(1.397614, 5/3))\n",
"print(18269/11719)\n",
"print(log(1.558921, 10/5))"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"10.340940789558791"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"11719 / 50000 ** 0.65"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 1.0, \"Heaps' Law\")"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAEWCAYAAABMoxE0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3dd3hVVdbA4d9Ko/cmHUQQsYAaimJvCBbsYsWKY++KM/qBozPqjH1QEBRFpYoNlSIgTZQSeodQE1qAFBICqev74+yMVyaQG+69uSnrfZ775Jx92jremMXZe5+9RVUxxhhjjlVEuAMwxhhTtlkiMcYYExBLJMYYYwJiicQYY0xALJEYY4wJiCUSY4wxAbFEYowxJiCWSIwBRGSLiFxyWNldIvJruGJyMRzxRa/CYjYmHCyRGGOMCYglEmP8JCJNRORrEdkjIptF5DGfbV1E5HcRSRWRnSIySERifLariDwmIptEZK+I/FtEIty2E0RkloikuW1jA4yzjoj86OJMccvN3LYLRWSFz75TRWShz/ocEbkmkOubiscSiTF+cH/0fwCWAU2Bi4EnRKSH2yUPeBKoD5zltj902GmuBWKBM4DewD2u/BXgZ6AO0Az4T8EBqirHEG4E8CnQEmgBHAQGuW3zgLYiUl9EooHTgCYiUkNEqrj45hzDNU0FFhXuAIwpRb4TkVyf9RhgsVvuDDRQ1b+79U0iMgzoA0xR1UU+x20RkY+A84F3fcrfUNVkIFlE3gVuAT4GcvD+6DdR1UQgoHYZVd0HfF2wLiL/AGa4bQfdE8h5wA68xJgKdAeygA3ueGP8Zk8kxvzhGlWtXfDhz08ULfH+5Z5a8AH+CjQCEJF2rgppl4jsB/6J93TiK8FneSvQxC0/BwiwQERWicg9BEBEqorIRyKy1cUyG6gtIpFul1nABXjJZBYwEy/pne/WjSkWSyTG+CcB2OybaFS1hqr2ctsHA2uBtqpaEy/JHF4t1dxnuQXeEwGquktV71fVJsADwIcickIAsT4NnAh0dbGc58oL4jk8kczCEokJgCUSY/yzAEgXkedFpIqIRIrIKSLS2W2vAewHMkSkPfBgIed41jWENwceB8YCiMiNBY3hQAqgQL6fcUWLSGWfT5SL5SCQKiJ1gQGHHfMbXqLpAixQ1VV4T1xd8Z5ejCkWSyTG+EFV84ArgU7AZmAvXvtGLbfLM8CtQDowDJckDvM9sAhYCvwEfOLKOwPzRSQDmAA8rqqb/AxtIl7SKPgMxGuXqeJinAdMPuxeDuC1/axS1WxX/DuwVVWT/LyuMf8lNrGVMaHnXixsq6rx4Y7FmGCzJxJjjDEBsURijDEmIFa1ZYwxJiD2RGKMMSYgFe7N9vr162urVq3CHYYxxpQpixYt2quqDQrbVuESSatWrYiLiwt3GMYYU6aIyNYjbbOqLWOMMQGxRGKMMSYglkiMMcYExBKJMcaYgFgiMcYYExBLJMYYYwJiicQYY0xALJEYY0x5l5kMc9+DLQHN4nxEFe6FRGOMqTB2LIWFw2DFeMg9BN2fgFbnBP0ylkiMMaY8yc2C1d/DgmGQuACiq0LHW6DL/dDo5JBc0hKJMcaUB2mJEPcpLB4BB/ZA3TZw+eteEqlSO6SXtkRijDFllSpsmQMLhsLaiaD50O5y7+nj+AshIoJt+zKZvHAjk1bu4qrTmnDPOa2DHoYlEmOMKWuy0mHZGFj4MexZC1XqwNmPQOw9UKcViSmZjJsez7TVu1m9cz8ApzStSd1qMSEJxxKJMcaUFXvWeclj6WjITofGnaD3h3DKdWRqNFNX7+b77xcyY10SApzZsg4v9GxPz1Ma06Je1ZCFZYnEGGNKs7xcWD/Zq77aPAsiY+Dka6FLPw7U78ic+H38MG4NM9clcSA7j/rVY3jkwhPo06UFTWtXKZEQLZEYY0xplJkMS77wel+lJUDNpnDRS2R3vINFe6P4ceEOvl/6CxlZudSrFsNVHZtwxWmNObtNfSIjpERDtURijDGlye7VsOAjWDYWcg9Cy3Ogxz/YXP8Cvl++m1GDVpCUnkVMVARXnNqYG85sRpfWdYmODN/75ZZIjDEm3PLzYP0UmD8YNs+GqMpw6o3kxN7PpL0NGPXrVuZt8t5KP69dA165pgVntalHzcrRYQ7cY4nEGGPC5WAqLPnSa/9I3epVX108gPjm1zNqxQG+G76d5AOJtKxXlWcua8e1ZzQrsXaP4rBEYowxJW3POpj/ESwbDTmZ0OJsci9+man5sXw6L5EFP60kQuDSDo244czmXNS+YYm3exSHJRJjjCkJ+fkQPxXmDYZNMyCyEpx6I6mn3s3YxDqMnZLApr3LaVijEi9ecRJXd2pCwxqVwx21XyyRGGNMKB1Kg6WjvCeQlM1QozF64Yv8Ur0XX689xLThSWTn7aZj89p8eNsZXNqhUVgbzo+FJRJjjAmFvRu8to+loyA7A5p3ZUfss4xL78S3C5LYum8L9avHcGvXFtzatQXtGtUId8THzBKJMcYES34+bPzF630VPw0iY8g/+Trm1b+BD9fX5Ncf9iKyhc4t6/LYRW3p3akJUWXs6aMwlkiMMSZQ2ZmwfIzX/rF3PVRvRGKnJxmWeT7fLs9m/6FsjquZwXOXn8hNsc2pX71SuCMOqpAmEhGpDMwGKrlrjVfVASLSGhgD1AMWAXeoaraIVAI+B84E9gE3q+oWd64XgHuBPOAxVZ3iyi8H3gMigY9V9fVQ3pMxxvxX+i7vzfO44XAwGT2uI4vPfJ23d5zM3Hnp1KyczUXtG3JVxyac365BuXj6KEyon0iygItUNUNEooFfRWQS8BTwjqqOEZEheAlisPuZoqoniEgf4A3gZhHpAPQBTgaaANNEpJ27xgfApUAisFBEJqjq6hDflzGmItu5DH7/EFZ+Dfm5HGxzOV/HXM276xuwd0s2TWvn8uIVJ3Fb15ZUiYkMd7QhF9JEoqoKZLjVaPdR4CLgVlc+AhiIl0h6u2WA8cAgERFXPkZVs4DNIhIPdHH7xavqJgARGeP2tURijAmu/Hxv8MR5H8KWOWh0Nba2vplh2ZcxZk00qsoFJ9bm1i4tuKh9QyJK8XsfwRbyNhIRicSrvjoB7+lhI5Cqqrlul0SgqVtuCiQAqGquiKThVX81Beb5nNb3mITDyrsWEkM/oB9AixYtAr8pY0zFkZXhuu8OhuRN5FZvwqzmjzIg4UwSV8VQr1oM953TjNu7taR53dAN1V6ahTyRqGoe0ElEagPfAu1Dfc1CYhgKDAWIjY3Vkr6+MaYMSkv03v1YPAIOpZFc5zRG1OrPoN0nI8lRXNqhEa92bs65bRuU6rfOS0KJ9dpS1VQRmQGcBdQWkSj3VNIM2O522w40BxJFJAqohdfoXlBewPeYI5UbY0zxJS6CeR+gq74DlMXVzuXfeRczb+cJtKxXlScubcZ1Z5bOMa/CJdS9thoAOS6JVMFrFH8DmAHcgNdzqy/wvTtkglv/3W3/RVVVRCYAo0TkbbzG9rbAAkCAtq4X2Ha8BvmCthdjjPFPXi6s/dFr/0iYT3ZUdX6IuYp39l9IZkRTrjizMY+dehzdWterUG0f/gr1E0ljYIRrJ4kAxqnqjyKyGhgjIq8CS4BP3P6fAF+4xvRkvMSAqq4SkXF4jei5wMOuygwReQSYgtf9d7iqrgrxPRljyousdFj8hdf+kbqN5JgmDOUuvsw4h9ZNG/PAhc254YxmFaLnVSDE61hVccTGxmpcXFy4wzDGhNP+nbDgIzRuOHIojbUxHXgn4zJmSRcuPaUJd53dkjNa1MHrNGoARGSRqsYWts3ebDfGVBxJa+C3QejysWh+HrMiuvJeVk92VzqFu3q04o3OzaldNSbcUZY5lkiMMeWbKmyZQ96v7xG5cRpZUokxORcwPK8XLduewn2xzeh5SuMK3/MqEJZIjDHlU14uuvo7Mme8Q7XklaRSi09zbuSXGlfS69yTGXNmMxrXsp5XwWCJxBhTruih/eyZ/QkxcUOonb2LXfmNGUE/Dp10A1fHtuGpNtbzKtgskRhjyoW0pG1smPAWJyZ+RUMOsDC/Pb8f9xDHdb6G509rSrVK9ucuVOy/rDGmTNu0Oo606W9zyt7JnE4+i6qew77T+tH53B50LmfDtZdWlkiMMWWO5uezdv5ksma9Q6dDCzioMcytdSVNej1Nl/Ydwx1ehWOJxBhTZmzds5/VM8bQeu1QTsrfwD5qMa/Vg7S74nEuaNA43OFVWJZIjDGl3pbdycz7bjBddnxBT9nJrsjGzOvwIh2vfIhuVauFO7wKzxKJMabUStiZxMof3uP07SPpIynsqHoi+y58meNib+C4CBu2pLSwRGKMKXVWb9hI4uS36br3W3rKATbWOJOUHs/T5JTLwIYtKXUskRhjSgVVZeGSJaROf4fzMibRnlzW1jmfnJ7P0+bEs8MdnjkKSyTGmLA6lJPHrNkzqLzgP3Q/NBuVCDY0voIWV/WnQ9MO4Q7P+MESiTEmLFIysvjl5+9otGIIPXQxmVRmw/F30OrKZ+lQr3nRJzClhiUSY0yJStqfydyfRtJ63VCuZz37I2qz5bSnaNnjMU6qWifc4ZljYInEGFMi1u9M4ddvh3DOri+4NmI7e6KOY2fXV2h8/n3UjKka7vBMACyRGGNCJj9fidu4kzWTh3LBnpHcE5FEUrU2JJ0ziIbdboFI+xNUHvj1LYrIo8CXqpoS4niMMeWAqjJ+3np2zhjKjVnf0EWS2VXzJNIu/jcNO14NERHhDtEEkb//HGgELBSRxcBwYIpWtDl6jTFFys9XpizewPZpg7jm4LfUl/3sqXcmhy4dynHtL7F3QMopvxKJqr4oIi8BlwF3A4NEZBzwiapuDGWAxpjSLzM7l5/mryZz9iCuyf6BWpLJ7obdye/1Nxq07h7u8EyI+f186Z5AdrlPLlAHGC8i/ypsfxFpLiIzRGS1iKwSkcdd+UAR2S4iS92nl88xL4hIvIisE5EePuWXu7J4EenvU95aROa78rEiYpMtG1OCMrNz+XTKPMa/dg89p11K35yxHGxyFnn3zaDRwxOJsCRSIfjbRvI4cCewF/gYeFZVc0QkAtgAPFfIYbnA06q6WERqAItEZKrb9o6qvnnYNToAfYCTgSbANBFp5zZ/AFwKJOJVsU1Q1dXAG+5cY0RkCHAvMNjfmzfGHJu9GVn8NGcBleYP4hadTozkkdz6Cqr1fIHjGp0c7vBMCfO3jaQOcJ2qbvUtVNV8EbmysANUdSew0y2ni8gaoOlRrtEbGKOqWcBmEYkHurht8aq6CUBExgC93fkuAm51+4wABmKJxJiQyczO5aufZ1N1wfvcIrOJEEhtdz31L+9P/Xptwh2eCZMiE4mIRAJ9VHVgYdtVdY0f52gFnA7MB7oDj4jInUAc3lNLCl6SmedzWCJ/JJ6Ew8q7AvWAVFXNLWR/Y0wQ7c3IYsLUGTRePojb838lLzKazJPvoPYlz1C/tr2FXtEVmUhUNc+1T7RQ1W3FvYCIVAe+Bp5Q1f0iMhh4BVD38y3gnuKet5gx9AP6AbRo0SKUlzKmXIlPSufbKb/QYcMQ7pLfyY6oRNIp99H48meJqdEo3OGZUqI4VVurRGQBcKCgUFWvPtpBIhKNl0RGquo37pjdPtuHAT+61e2A7z9tmrkyjlC+D6gtIlHuqcR3/z9R1aHAUIDY2FjrtmzMUagqi7elMmn6L3TcPJSnI+aTE1WZ1I4PUfeSp2lcrV64QzSljL+J5KXinlhEBPgEWKOqb/uUN3btJwDXAivd8gRglIi8jdfY3hZYAAjQVkRa4yWKPsCtqqoiMgO4ARgD9AW+L26cxhhPfr4yf3MyE36eSvcdw/lrxAJyo6twqPNjVD3vcSpZAjFH4O97JLOO4dzdgTuAFSKy1JX9FbhFRDrhVW1tAR5w11jl3k1Zjdfj62FVzQMQkUeAKUAkMFxVV7nzPQ+MEZFXgSV4icsYUwyHcvIYF5fA1Bkz6HNwFK9FLiA7phq5XZ8k5pxHialaN9whmlJO/HlBXUS6Af8BTgJi8P6gH1DVmqENL/hiY2M1Li4u3GEYE3ZZuXmMnr+NaTOmc1vWGHpGLiQnqjp0fYDo7o+AJRDjQ0QWqWpsYdv8rdoahFel9BUQi/dOSbujHmGMKZWycvP4etF2pv7yM30yR/NlZBy5lavD2c8T3e1BqGJDuZvi8XvoTVWNF5FIV930qYgsAV4IXWjGmGDKycvn+6U7mDx1MjcdGMWnkYvIqVwDPft5oiyBmAD4m0gy3fAjS92QKDspxvAqxpjwSTuYw+e/bWHBvFnceWgUH0cuIqdyTbT7C0R3/QtUqR3uEE0Z528iuQOvXeQR4Em87rjXhyooY0zgsnLz+GDGRubMncN9eWN4NHIBOZVrkH/2C0Sf9SBUrhXuEE054W+vrYKhUQ4CL4cuHGNMoDKychk9fxtT5vzKbYfG8ETkb2jlqnDWs0Sf9bBVYZmgO2oiEZEVeN10C6WqpwU9ImPMMYlPyuDz37fwW9xi+ul4xkbNgZgYIro9Bmc/DvYeiAmRop5ICh2Q0RhTeixNSOW9aetZs24tj0V/x4DImUhkFBGd/wLnPAnVG4Y7RFPOHTWRHD7arzGmdFBVZq7bw0ezN7Jx0yaeqPwDw6pMJ1IUOeMuOO8ZqNkk3GGaCsLf+UjS+aOKKwaIpoy+kGhMWZaXr8xcl8T70zeQkJjAU1Un0qfqz0RqDtLpVjjvWajTMtxhmgrG38b2GgXLbgyt3kC3UAVljPmz/Hxl8qpdvPXzOvbs2c1T1X7mtmoTico7iJx2M5z/HNh8ICZM/H4hsYCbcvc7ERkA9C9qf2PMsVNV5mzYy1tT17M+YRdP1ZzBXTW+IzonHU6+Fi54ARqcGO4wTQXnb9XWdT6rEXjDpBwKSUTGGFSVufH7+PeUtaxO3Md9Vecwpta3VMnaC+0uh4tehONODXeYxgD+P5Fc5bOcizdqb++gR2NMBZeXr/yyNokhszayeOs+7qy2kC/rfk2NzERodDZcMhJaWK2yKV38bSO5O9SBGFORFbSB/HvKOjbvzeC66qsYVn88dTPWQ41T4dp34YRLQCTcoRrzP/yt2joeeA+vgV2B34EnVXVTCGMzpkJYsi2F1yauZcGWZK6pu43xTcZRL3kxRLeG6z+Bk6+DCBvazpRe/lZtjQI+wJvRELwh5UcDXUMRlDHlXX6+MnvDHobP3cKcDXvoWmUHvzafQLM9syHiOLjibTjjToiMDneoxhTJ30RSVVW/8Fn/UkSeDUVAxpR3v23cy6s/rmH1zv2cXj2FSc0mcOKen5H0mnDJQOjyAMRUDXeYxvitqLG2CqZImyQi/fHmRlfgZmBiiGMzplzZsvcA/5qylokrdnFq7SymnTiZNglfIynR3lAm3R+zARVNmVTUE8kivMRR0ML3gM82xSa2MqZI63en8+nczYyLS6RWZBaj2v7GWbtGIglZcEZf72XCGseFO0xjjllRY221LqlAjClv1u7az3vTNjBp5S6qRClvHb+Uq1JGEJmQBCdd7VVj2dvophzw+812ETkF6ABULihT1c9DEZQxZdnqHfv558Q1/Bq/l2oxEbzTcQdX7/mIyMQN0Lwb9BkJzbuEO0xjgsbf7r8DgAvwEslEoCfwK3DURCIizd0+jfCqwoaq6nuu7WUs0Arv5cabVDXFjeP1HtALyATuUtXF7lx9gRfdqV9V1RGu/EzgM6CKi+1xN4yLMSUqPimDf09Zy5RVu6lVJZo3z8qm954hRK+bB/VOgJtHQvsr7F0QU+74+0RyA9ARWKKqd4tII+BLP47LBZ5W1cUiUgNYJCJTgbuA6ar6umvE7w88j5eg2rpPV2Aw0NUlngF4Q7OoO88EVU1x+9wPzMdLJJcDk/y8L2MCdiArl3enrWf43C1UjorgpbOrcPuBT6m0ZAJUa2BdeU25528iOaiq+SKSKyI1gSS8eduPSlV3AjvdcrqIrAGa4g2vcoHbbQQwEy+R9AY+d08U80Sktog0dvtOVdVkAJeMLheRmUBNVZ3nyj8HrsESiSkBOXn5jF6wjfenx7M3I4u7O9XguaoTqLL0My9pnP88nP0oVKpR5LmMKcv8TSRxIlIbGIbXkysD7+12v4lIK+B0vCeHRi7JAOzCq/oCL8kk+ByW6MqOVp5YSPnh1+4H9ANo0aJFccI2plDrdqXz3NfLWZaQytktqvJ9xyU0XfkRZGd4Tx8XvGA9sUyF4e9YWw+5xSEiMhnvKWC5vxcRkerA18ATqrpffOqIVVVFJKRtGqo6FBgKEBsba+0n5phlZufyyo+rGbswgWqVIvmq+3ZiN7yHLEqEE3vBxQOgYftwh2lMifK3sf1a4BdVTVPVLa7K6RpV/c6PY6PxkshIVf3GFe8WkcaqutNVXSW58u38ucqsmSvbzh9VYQXlM115s0L2NybofovfywvfriAhOZO/npbBXfuHErUoDo47Da4dAq3PDXeIxoSFvyPBDVDVtIIVVU3Fa/w+KtcL6xNgjaq+7bNpAtDXLfcFvvcpv1M83YA0VwU2BbhMROqISB3gMmCK27ZfRLq5a93pcy5jgmJ76kEeG72EWz+eT/28JOa1G8196/oRlZ4AvT+AfjMtiZgKzd82ksISjj/HdgfuAFaIyFJX9lfgdWCciNwLbAVuctsm4nX9jcfr/ns3gKomi8grwEK3398LGt6Bh/ij++8krKHdBEn6oRw+m7uFD2bGU1kPMbrNLLrtHo1sx5sbvfsTUKl6uMM0JuzEn1cuRGQ4kIo3AjDAw0BdVb0rdKGFRmxsrMbFxYU7DFOK5ecrIxds492p60k5cIgBzZZye+bnRGYmwSk3eG+k1y6y06Ix5YqILFLV2MK2+ftE8ijwEt5LhABT8ZKJMeXKqh1p/N/3q1i0NYW7miTwXK0RVN27Gpp1hltGQfPO4Q7RmFLH315bB4D+7qVCVdWM0IZlTMlKP5TDsDmbGfTLBtpX2suvLb+j2e5foGYzb3KpU663N9KNOQJ/e22dijfUSV23vhfoq6orQxibMSGnqnwVl8g/J60hOzOdj5pM45K08UhyNFz0Ipz1CERXCXeYxpRq/lZtfQQ8paozAETkArz3Ms4OUVzGhNzyxFTemLyWufF7eaLhMh6qPIKY5N3Q8RavHcReKDTGL/4mkmoFSQRAVWeKSLUQxWRMSKVl5vDBzHg+nrOJblUSmddoFMelLYXGneCWL21kXmOKyd9EsklEXgIKptu9HdgUmpCMCY1DOXkMn7uZYbM3oZnJjGoyka4pPyA5deHq/0Cn2yHC31erjDEF/E0k9wAvAwVvps92ZcaUeqrKnA17+b/vV5KwL53/a7yA26I/Jyolw5sf/YL+UKV2uMM0pszyN5G0VtXHQhqJMSGwcnsar01aw9z4fVxZaxM/Nh5J9ZQ10Po86PkvaHhSuEM0pszzN5G8JSLHAeOBsdZby5R2iSmZvDF5HT8s28GJVfYzrcW3nJA0BSo3h5s+96a6te68xgSFv++RXOgSyU3AR25OkrGq+mpIozOmmPLylVHzt/LapLVEaA6fnTif83d8giTnw/n9ofvjEFM13GEaU674PWe7qu4C3heRGcBzwP8BlkhMqbFxTwZPj1vG0oRU7mm+i/75Q4nZuhba9YSer0OdVuEO0Zhyyd8XEk8CbgauB/bhDZXydAjjMsZvGVm5fDgjnk9+3cxxURnMOOFHWid+B7WaQ5/R0L5XuEM0plzz94lkODAG6KGqO0IYjzHFMmv9Hv76zQp2ph3gH80XcXPap0TsOADnPAXnPQMx9rqTMaHmbxvJWaEOxJjiSEjOZOCEVUxfm8RldXYxpekIqictg1bnwhVvQYMTwx2iMRWG320kxpQGuXn5vP9LPB/N2kgNyWTC8ZM5ded4JKI+XPcxnHqD9cYypoRZIjFlxqKtyTw9bhlb9h1gYOu13JE2hMid+6Dz/XDhX+2lQmPC5KiJRES+UNU7RORxVX2vpIIyxte+jCzemLyWcXGJdKyeyqJWo6m3cw40OQNu/wqanB7uEI2p0Ip6IjlTRJoA94jI58Cf6gx8prs1JuiycvMYNnsTQ2ZtIjsnm0/azueiXZ8gyRHeW+md74OIyHCHaUyFV1QiGQJMB44HFvHnRKKu3Jig27Qng2fHL2fR1hTuPT6NZ7M/oHLCSu+dkCvehFrNwh2iMcY5aiJR1ffxXkIcrKoPllBMpgLLz1fem76BIbM2Uisqm6kdptN28xdQrYENbWJMKeVv998HRaQjcK4rmq2qy0MXlqmIEpIz+dt3K5m9fg/PttnGA+mDiNqUCGfe7U00ZY3pxpRKfk2+ICKPASOBhu4zUkQe9eO44SKSJCIrfcoGish2EVnqPr18tr0gIvEisk5EeviUX+7K4kWkv095axGZ78rHikiMf7dtSpP8fOXtqeu55O1ZbNq8iV9afcHD2/sTFVMN7p4MV71rScSYUkxUteidRJYDZ6nqAbdeDfhdVU8r4rjzgAzgc1U9xZUNBDJU9c3D9u0AjAa6AE2AaUA7t3k9cCmQCCwEblHV1SIyDvhGVceIyBBgmaoOPlpMsbGxGhcXV+Q9m5KxYXc6L3yzgrityQxosYK+aUOIyM2Ec5+Bc56AqErhDtEYA4jIIlWNLWybv++RCJDns57HYT24CqOqs0WklZ/X6A2MUdUsYLOIxOMlFYB4Vd0EICJjgN4isga4CLjV7TMCGAgcNZGY0iEvX/l07mb+NWUdLaNS+b3FaBonzYLmXb3ZCu3NdGPKDH8TyafAfBH51q1fA3wSwHUfEZE7gTjgaVVNAZoC83z2SXRlAAmHlXcF6gGpqppbyP5/IiL9gH4ALVq0CCBsEwxrdu6n/zcrWJaQwoCmi+mbPpSI5Fzo8Rp0fcC69BpTxvjb2P62iMwEznFFd6vqkmO85mDgFbzuw68AbxHiaXtVdSgwFLyqrVBeyxxZTl4+b/68jo/nbKZdpRTmN/+SRnt+g5bdvaeQem3CHaIx5hgUZz6SxcDiQC+oqrsLlkVkGPCjW90ONPfZtZkr4wjl+4DaIhLlnkp89zelzJa9B3ju6+Us3LyXt45fwrV7hiCpQPwxtpUAABkjSURBVK83IfZeiPCr34cxphQq8bG2RKSxqu50q9cCBT26JgCjRORtvMb2tsACvLaYtiLSGi9R9AFuVVV1k2zdgDfEfV/g+5K7E+OPQzl5fDgjnsGzNnJC1F7mN/2ChjsWQOvz4er3bbIpY8qBkCYSERkNXADUF5FEYABwgYh0wqva2gI8AKCqq1wvrNVALvCwqua58zwCTAEigeGquspd4nlgjIi8CiwhsHYbE2Qrt6fxwBeL2JF6gH+3XMD1+4Yh6VFw5btw5l32YqEx5YRf3X8BRKQl0FZVp4lIFSBKVdNDGl0IWPff0FNVPvl1M/+avI6Tqu3n0zqfUXf3b9DmYrjqPajdvOiTGGNKlYC7/4rI/Xi9nuoCbfDaI4YAFwcrSFM+7D+Uw/PjlzNp5U7+2mwF96V/6PXIuvId7w11ewoxptzxt2rrYbx3OuYDqOoGEWkYsqhMmbQ8MZVHRi0hI2UX05uPp82ead57IdcMth5ZxpRj/iaSLFXNFvevSRGJwmvjMIbcvHw+nLmRd6et5+oqy/l3rY+J3pfmjY919mP2Xogx5Zy/iWSWiPwVqCIilwIPAT+ELixTVsQnpdP/6xWs2bqDLxp+Q/f9P0GtU+Da7+G4U8IdnjGmBPibSPoD9wIr8HpZTVTVYSGLypR62bn5DJ65kUEzNnB29AYW1B1K1fQdcM6TcMELNkaWMRWIv4nkNrxxsP6bPETkSlX98SjHmHJq3qZ9vPLjatbuSOE/TabSM+VLpFIL6DMJWnQLd3jGmBLm7+vE/wHmiMhJPmV/D0E8phTLys3jtUlr6DN0HjHpCcQ1eYteyZ8jp/WBv/xqScSYCsrfJ5LNeFVb40VkoKp+hR+j/5ryY0fqQf7y5SKWJ6bxetu13Jz0DnJA4PpP4NQbwh2eMSaM/E0kqqqLReR8YLSIdMV7y9yUc6rK1NW76f/NCqJy0pnb7juabvve69Z73TCo0zLcIRpjwszfRLITQFX3upkL3wCsS045l5Wbx9++Xcn4RYn0qrudd6sNIiYhAc7vD+c9C5ElPlSbMaYU8ncY+St8lvOBZ93HlFO70g7x4MhFLN2WzBftfuOcxKFIjcZw10RoeVa4wzPGlCJHTSQi8q6qPiEiP1DIC4iqenXIIjNhoap8FZfIPyetoUpOCgtajKDBtrlw8rXeYIs2d7ox5jBFPZF84X6+edS9TLmw/1AOr/ywmq8WJXJH4+0MyH6TqH2pNlqvMeaojppIVHWR+zkLQESi8dpGtqtqUujDMyVlw+507v88jq37MvjshLmcv30oUqcl3D4eGp8W7vCMMaXYUd8jEZEhInKyW64FLAM+B5aIyC0lEJ8pAbPX7+GGIb8TeSiZJW2GcUHiYKTD1dBvliURY0yRinoh8VyfSaTuBtar6qnAmcBzIY3MhFxevvL6pLXcOXwBF1TZyJQqf6P2rt/hirfghk+hcs1wh2iMKQOKaiPJ9lm+FPgKQFV3idWXl2lpB3N4YswSZqxL4oPWv9Fr1xCkdnO4dSo06RTu8IwxZUhRiSRVRK7Emyu9O97b7QXDyFcJcWwmRBJTMuk7fAF7k5OZ2Wo0rXb+DCddBb0/gMq1wh2eMaaMKSqRPAC8DxwHPKGqu1z5xcBPoQzMhMbMdUk8PW4Zx+Vt57f6g6i2eyNc8jJ0f9x6ZRljjklRvbbWA5cXUj4FmBKqoEzwqSqf/baFV35czR111/B/Oe8SmR0Nt38DbS4Md3jGmDLM39F/j4mIDBeRJBFZ6VNWV0SmisgG97OOKxcReV9E4kVkuYic4XNMX7f/BhHp61N+poiscMe8L9ZwU6icvHyeHreMv/+wkncaTuLlA68QWe946DfTkogxJmAhTSTAZ/zvE01/YLqqtgWmu3WAnkBb9+kHDAYv8QADgK5488YPKEg+bp/7fY77n6enii7tYA73jYhj2pJ1TG88mN5pX0DHW+GeKTbgojEmKEKaSFR1NpB8WHFvYIRbHgFc41P+uXrmAbVFpDHQA5iqqsmqmgJMBS5322qq6jxVVbz3W67B/FdCciY3DP6N7fHL+bXuPzg+bT70ehOu+RCira+EMSY4ip1IRCTQWREbqepOt7wLaOSWmwIJPvslurKjlScWUm6ARVuT6fX+HI7fP5/J1QZSkwPQ90focr81qhtjgupYnkiC9sfaPUn8z2CQwSYi/UQkTkTi9uzZE+rLhd3I+VvpM/R37on+mSHyOlF1WsD9v9iovcaYkDiWRLIkwGvudtVSuJ8FY3ZtB5r77NfMlR2tvFkh5f9DVYeqaqyqxjZo0CDA8EsvVeXNKesY8O1SPqozkidzPkba9YB7rT3EGBM6xU4kqnpPgNecABT0vOoLfO9TfqfrvdUNSHNVYFOAy0SkjmtkvwyY4rbtF5FurrfWnT7nqnBUldcnr+XLGUuYXO8dLsr4Cc55Em4eCZVqhDs8Y0w5FtIp7kRkNHABUF9EEvF6X70OjBORe4GtwE1u94lALyAeyMQb2wtVTRaRV4CFbr+/q2pBA/5DeD3DqgCT3KfCUVXembqen2fPZVrNd6h3KAmu/Qg69gl3aMaYCkC8ZoqKIzY2VuPi4sIdRtDk53tPInFzJvNF1XeoWika6TMKWnQNd2jGmHJERBapamxh22zS7TIsL1958buVpMSNZ2zlD4mq1Ry5fTzUPT7coRljKpCiptq908/zLFXV5UGIx/gpNy+f/t+soObSYfwz5kto2hm5ZQxUqxfu0IwxFUxRTySt/TzPlgDjMMVwICuXJ0Yvolv8O9wbPQnaXwnXf2wvGRpjwqKoRPJPVc0pkUiMX5IPZHPXsDk8sO91rohaAF0fhB7/gIjIcIdmjKmgikok20VkAjAa+EUrWst8KZOYkskDH8/kxfRXOStyFfT4J5z1cLjDMsZUcEW9R3ISXrfbF4EEEXnPveNhStjyxFT6DprC6xkv0i1yLVw71JKIMaZUOGoiUdV9qvqRql6IN/LuJuAdEdkoIv8okQgNi7el8OTHk/g4/yVOjkpE+oyEjjeHOyxjjAGK8Wa7qu4APsEbuj0duC9UQZk/rEhM4+XPfuBLeYmW0SlE3P41nNgz3GEZY8x/FZlIRKSyiNwoIt/gvXV+Ed4cIk1CHVxFF5+UwcBhYxme/xINK+URcdeP0PrccIdljDF/UtR7JKOAS4BZwEjgVlU9VBKBVXRb9h7g1Y/H8om8TPVqNYi8awI0aBfusIwx5n8U1WtrMvCAqqaXRDDGs3XfAV4aMpJBuQOpUq0WUff+ZG+rG2NKraISyUygYNTdo0lV1f3BCali25F6kFeHjuKD3IFUqV6b6Ht+grr+vhdqjDElr6hEMqKI7eBNTPUZ3lS3JgD7MrJ4efDnvHVoIJVq1CX63ok2j4gxptQ7aiJx3X5NCTiQlcvfP/qSNw8NIKZGfSrdNxFqtwh3WMYYUyQb/bcUyMjK5dXh43l5/0tEVa9HpfsnQ61mRR9ojDGlwLFMtWuCKC9fee3LH3l613NUqlyNKvf+aEnEGFOm2BNJmA36dgYPbnuaGjERVL73B2tYN8aUOZZIwmji70u5atmD1I86SOW7J0LD9uEOyRhjis0SSZgsWLeVlpP60iQyhag7v4MmncIdkjHGHBNrIwmDTbtSyBl9B+0jtpF3/QiiWp0d7pCMMeaYWSIpYbm5eWz89D66s4y0i/9NtVNsAEZjTNkWtkQiIltEZIWILBWROFdWV0SmisgG97OOKxcReV9E4kVkuYic4XOevm7/DSLSN1z3469ZHz/DpVnT2HDSQ9Q91wZQNsaUfeF+IrlQVTupaqxb7w9MV9W2wHS3DtATaOs+/fCGskdE6gIDgK5486UM8GM4l7CZNe49Lt41nGX1rqDtTf8MdzjGGBMU4U4kh+vNH8OyjACu8Sn/XD3zgNoi0hjoAUxV1WRVTQGmApeXdND+WDJ3Ct1W/Z01lU/nlL98BiLhDskYY4IinIlEgZ9FZJGI9HNljVR1p1veBTRyy02BBJ9jE13Zkcr/RET6iUiciMTt2bMnmPfgl12Jm2j2cz/2RdSn1V++IjI6psRjMMaYUAln999zVHW7iDQEporIWt+NqqoiosG4kKoOBYYCxMbGBuWc/srPyiTjs5tozCHSbvmWKrUblOTljTEm5ML2RKKq293PJOBbvDaO3a7KCvczye2+HWjuc3gzV3ak8tJBlc2f3ccJuRtY2vlfNGl3RtHHGGNMGROWRCIi1USkRsEycBmwEpgAFPS86gt875YnAHe63lvdgDRXBTYFuExECuZMucyVlQp7Zw2lzc6f+LrmHZx9xZ3hDscYY0IiXFVbjYBvxWtwjgJGqepkEVkIjBORe4GtwE1u/4lAL7w54zOBuwFUNVlEXgEWuv3+rqrJJXcbR5a/Yzk1Z/6NuXoaZ939BmKN68aYciosiURVNwEdCynfB1xcSLkCDx/hXMOB4cGOMSBZ6aR9cRvZWo2dF79H9zrVwh2RMcaETGnr/lsu7P/6MWpmJjC6+QCuP/f0cIdjjDEhZYkkyHTlN9Rc/w1DI27kjj63WZWWMabcs9F/gyl9N9nfP8ma/OOp2+sF6lWvFO6IjDEm5OyJJFhUOfTto2jOAYbVe54bu9gEVcaYisESSbAsH0vlTVN4J78PT996JRERVqVljKkYrGorGDKTyZ74Aivy21Lnosc5vkH1cEdkjDElxp5IgiB36stEZqUypPrD3Htum3CHY4wxJcoSSaAS44hcMoLPcntw13VXER1p/0mNMRWL/dULhCrZPz5LktZmdbuH6X5C/XBHZIwxJc4SSSDW/EDMrsW8m3cjT10dW/T+xhhTDllj+7HKyyX754Fs1aZU6Xw7TWtXCXdExhgTFvZEcqyWjSYmdSP/4RYevKh9uKMxxpiwsSeSY5GfT9ast9mQ34pW3W+kQQ17g90YU3HZE8mxWPcTldI2MZze3HPu8eGOxhhjwsqeSI5B9ux32aUNqdbpWmpXtfnXjTEVmz2RFNfu1cTsjGNE3mXcf/6J4Y7GGGPCzp5Iiilv0efkE8We1tfSol7VcIdjjDFhZ4mkOPJyyV06hml5Z3DdOaeFOxpjjCkVrGqrOBLmUyk7hd+rnM95bRuEOxpjjCkV7ImkGDJX/kiURlL31MttmHhjjHEskRRD5rpfWJ9/Ild1sUZ2Y4wpUC6qtkTkchFZJyLxItI/JBfJOUjt9A1sq3oybRvVCMkljDGmLCrziUREIoEPgJ5AB+AWEekQ7Osc3LmGKPKIaX56sE9tjDFlWplPJEAXIF5VN6lqNjAG6B3si2zZtAGAFq3bBfvUxhhTppWHRNIUSPBZT3Rl/yUi/UQkTkTi9uzZc0wX0WoNmFu9Bye0DfrDjjHGlGkVorFdVYcCQwFiY2P1WM7RofNF0PmioMZljDHlQXl4ItkONPdZb+bKjDHGlIDykEgWAm1FpLWIxAB9gAlhjskYYyqMMl+1paq5IvIIMAWIBIar6qowh2WMMRVGmU8kAKo6EZgY7jiMMaYiKg9VW8YYY8LIEokxxpiAWCIxxhgTEEskxhhjAiKqx/R+XpklInuArcd4eH1gbxDDCafyci92H6WL3UfpE6x7aamqhU7EVOESSSBEJE5VY8MdRzCUl3ux+yhd7D5Kn5K4F6vaMsYYExBLJMYYYwJiiaR4hoY7gCAqL/di91G62H2UPiG/F2sjMcYYExB7IjHGGBMQSyTGGGMCYonETyJyuYisE5F4Eekf7ngKiMgWEVkhIktFJM6V1RWRqSKywf2s48pFRN5397BcRM7wOU9ft/8GEenrU36mO3+8O1aCFPdwEUkSkZU+ZSGP+0jXCPJ9DBSR7e47WSoivXy2veBiWiciPXzKC/39ctMjzHflY91UCYhIJbce77a3CvA+movIDBFZLSKrRORxV16mvpOj3EdZ/E4qi8gCEVnm7uXlY71+sO7xiFTVPkV88Ian3wgcD8QAy4AO4Y7LxbYFqH9Y2b+A/m65P/CGW+4FTAIE6AbMd+V1gU3uZx23XMdtW+D2FXdszyDFfR5wBrCyJOM+0jWCfB8DgWcK2beD+92pBLR2v1ORR/v9AsYBfdzyEOBBt/wQMMQt9wHGBngfjYEz3HINYL2Lt0x9J0e5j7L4nQhQ3S1HA/Pdf79iXT+Y93jEWIPxR6G8f4CzgCk+6y8AL4Q7LhfLFv43kawDGrvlxsA6t/wRcMvh+wG3AB/5lH/kyhoDa33K/7RfEGJvxZ//AIc87iNdI8j3MZDC/2j96fcGbw6ds470++X+kOwFog7/PSw41i1Huf0kiN/N98ClZfU7KeQ+yvR3AlQFFgNdi3v9YN7jkT5WteWfpkCCz3qiKysNFPhZRBaJSD9X1khVd7rlXUAjt3yk+zhaeWIh5aFSEnEf6RrB9oir8hnuU1VT3PuoB6Sqau5h5X86l9ue5vYPmKsSOR3vX8Bl9js57D6gDH4nIhIpIkuBJGAq3hNEca8fzHsslCWSsu8cVT0D6Ak8LCLn+W5U758UZa6Pd0nEHcJrDAbaAJ2AncBbIbhGSIhIdeBr4AlV3e+7rSx9J4XcR5n8TlQ1T1U7Ac2ALkD7MIdUKEsk/tkONPdZb+bKwk5Vt7ufScC3eL9su0WkMYD7meR2P9J9HK28WSHloVIScR/pGkGjqrvdH4B8YBjed3Is97EPqC0iUYeV/+lcbnstt/8xE5FovD++I1X1G1dc5r6Twu6jrH4nBVQ1FZiBV81U3OsH8x4LZYnEPwuBtq4nQwxeQ9aEMMeEiFQTkRoFy8BlwEq82Ap6y/TFqyfGld/petx0A9JclcIU4DIRqeMe+S/DqxPdCewXkW6uh82dPucKhZKI+0jXCJqCP4rOtXjfScG1+7jeNa2BtngN0IX+frl/nc8AbigkXt/7uAH4xe1/rDEL8AmwRlXf9tlUpr6TI91HGf1OGohIbbdcBa+tZ80xXD+Y91i4YDVqlfcPXi+V9Xh1lH8LdzwupuPxelosA1YVxIVXxzkd2ABMA+q6cgE+cPewAoj1Odc9QLz73O1THov3P91GYBDBazwcjVfFkINXB3tvScR9pGsE+T6+cHEud/8TN/bZ/28upnX49IA70u+X+44XuPv7Cqjkyiu79Xi3/fgA7+McvCql5cBS9+lV1r6To9xHWfxOTgOWuJhXAv93rNcP1j0e6WNDpBhjjAmIVW0ZY4wJiCUSY4wxAbFEYowxJiCWSIwxxgTEEokxxpiAWCIxZZaIqIi85bP+jIgMLOY5PhNvVNhKbr2+iGwJUnwXiMiPwThXEddp4EZqXSIi5x627QkRqerHOTJCF6Ep7yyRmLIsC7hOROoHeJ48vHcfShURifRz14uBFap6uqrOOWzbE3gD/hkTMpZITFmWizcf9ZMBnudd4EmfISGA/32iEJFBInKXW94iIq+JmwdGRM4QkSkislFE/uJzmpoi8pN4cz4MEZEId/xlIvK7iCwWka/c2FAF531DRBYDNx4WTysR+UW8gQeni0gLEemENwx7bxdLFZ/9HwOaADNEZIYru0W8OUFWisgbh/+HcE9kv4vIFW79WRFZ6K75sk8ca0RkmHjzZPxccF0ReUy8uUCWi8iYY/s6TFljicSUdR8At4lIrQDOsQ34FbijuMepN6DeHOAzvCElugEv++zTBXgUb06INvzxBPUicIl6A27GAU/5HLNPVc9Q1cP/EP8HGKGqpwEjgfdVdSnwf3hzT3RS1YMFO6vq+8AO4EJVvVBEmgBvABfhDV7YWUSuKdhfRBoBP+G9Qf2TiFyGN5xGF7f/mfLHoKBtgQ9U9WQgFbjelfcHTncx+iZUU45FFb2LMaWXqu4Xkc+Bx4CDRe1/FK/hjSf0UzGOKRhvbQXeBETpQLqIZBWMkQQsUNVNACIyGm8Ij0N4iWWuNzQUMcDvPucde4TrnQVc55a/wHsSKY7OwExV3ePiGYk3Mdd3eBMnTQceVtVZbv/L3GeJW6+Ol0C2AZtdEgNYhDcnC3jDeYwUke/ceU0FYInElAfv4k3682lhG0VkCt4cF3Gqel9h+6jqBvHmfbjJpziXPz+1Vz7ssCz3M99nuWC94P+tw8cgUrxxqqaq6i2F3g0cOEJ5KOXiJYQeQEEiEeA1Vf3Id0fx5vnwvd88oKBK7Qq85HQV8DcROVX/mNfClFNWtWXKPFVNxpsa9N4jbO/hqn0KTSI+/gE847O+FejgRk2tjdeoXVxd3OiqEcDNeFVo84DuInIC/HcU53Z+nOs3vBFaAW7Dq1IrSjrelLPgDcJ3vmsHicSbpbAgaSheh4P2IvK8K5sC3OPTftNURBoe6ULuHpur6gzgebxhzKv7EaMp4+yJxJQXbwGPBHICVV3lGrnPcOsJIjIOb+TVzfxRxVMcC/FGuj0Bb2jub1U13zXajy7odozXZrK+iHM9CnwqIs8Ce4C7/bj+UGCyiOxw7ST9XRwC/KSq/x0eXFXzROQWYIKIpKvqhyJyEvC7q4LLAG7HewIpTCTwpWuvErw2nFQ/YjRlnI3+a4wxJiBWtWWMMSYglkiMMcYExBKJMcaYgFgiMcYYExBLJMYYYwJiicQYY0xALJEYY4wJyP8D0Ts/I8W7IIQAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.plot(N, V)\n",
"\n",
"k = 10.34\n",
"beta = 0.64\n",
"plt.plot(N, k * (N**beta))\n",
"plt.xlabel('N - Number of tokens')\n",
"plt.ylabel('|V| - Size of vocabulary')\n",
"plt.title('Heaps\\' Law')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment