Skip to content

Instantly share code, notes, and snippets.

@sirex
Created September 26, 2016 21:44
Show Gist options
  • Save sirex/2fc15b3f60d146c9b9814e0aa996e742 to your computer and use it in GitHub Desktop.
Save sirex/2fc15b3f60d146c9b9814e0aa996e742 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 181,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import xmltodict\n",
"import bz2\n",
"import collections\n",
"import re\n",
"import tqdm\n",
"import unicodedata as ud\n",
"import string\n",
"import locale\n",
"import funcy\n",
"import pandas as pd\n",
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"mpl.rc('font', family='Ubuntu', size=16)\n",
"mpl.rc('figure', figsize=(16, 10))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Parsisiunčiame naujausią Lietuviškos Vikipedijos straipsnių duomenų bazę."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--2016-09-26 20:43:08-- https://dumps.wikimedia.org/ltwiki/20160920/ltwiki-20160920-pages-articles.xml.bz2\n",
"Ieškoma dumps.wikimedia.org (dumps.wikimedia.org)... 208.80.154.11, 2620:0:861:1:208:80:154:11\n",
"Jungiamasi prie dumps.wikimedia.org (dumps.wikimedia.org)|208.80.154.11|:443... prisijungta.\n",
"HTTP užklausa išsiųsta, laukiama atsakymo... 200 OK\n",
"Dydis: 145996340 (139M) [application/octet-stream]\n",
"Saving to: ‘ltwiki-20160920-pages-articles.xml.bz2’\n",
"\n",
"ltwiki-20160920-pag 100%[===================>] 139,23M 1,91MB/s in 82s \n",
"\n",
"2016-09-26 20:44:31 (1,69 MB/s) - ‘ltwiki-20160920-pages-articles.xml.bz2’ saved [145996340/145996340]\n",
"\n"
]
}
],
"source": [
"!wget https://dumps.wikimedia.org/ltwiki/20160920/ltwiki-20160920-pages-articles.xml.bz2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Duomenys paimti iš https://dumps.wikimedia.org/backup-index.html"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Funkcija, kuri pašalina kirčio ženklus."
]
},
{
"cell_type": "code",
"execution_count": 141,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"lt_letters = 'ąčęėįšųūž'"
]
},
{
"cell_type": "code",
"execution_count": 140,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'ąčęėįšųūž ąžuolynas ranka ranka rėtis'"
]
},
"execution_count": 140,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def strip_accents(s):\n",
" s = ud.normalize('NFC', s)\n",
" s = [c if c in lt_letters else ud.normalize('NFD', c) for c in s]\n",
" s = [c for c in ''.join(s) if not ud.combining(c)]\n",
" return ud.normalize('NFC', ''.join(s))\n",
"\n",
"strip_accents('ąčęėįšųūž ąžuolýnas rankà rañkā́ rėtis')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Išrenkame visu žodžius ir suskaičiuojame, kiek, kiekvienas žodis pasikartojo. Rezultatą gauname `wcounter` kintamąjame."
]
},
{
"cell_type": "code",
"execution_count": 144,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 321606/321606 [05:59<00:00, 895.45it/s]\n"
]
}
],
"source": [
"clean_re = re.compile(r'\\d')\n",
"word_re = re.compile(r'\\W')\n",
"\n",
"wcounter = collections.Counter()\n",
"\n",
"def extract_words(path, page):\n",
" global words, pages, progress\n",
" tag, attrs = path[-1]\n",
" if tag == 'page' and page['revision']['format'] == 'text/x-wiki':\n",
" text = page['revision']['text'].get('#text', '')\n",
" text = clean_re.sub('', text.lower())\n",
" text = strip_accents(text)\n",
" words.update(filter(None, word_re.split(text)))\n",
" progress.update(1)\n",
" return True\n",
"\n",
"\n",
"stream = bz2.open('ltwiki-20160920-pages-articles.xml.bz2')\n",
"with tqdm.tqdm(total=321606) as progress:\n",
" xmltodict.parse(stream, item_depth=2, item_callback=extract_words)"
]
},
{
"cell_type": "code",
"execution_count": 151,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"words = pd.DataFrame(wcounter.most_common(), columns=['word', 'freq'])"
]
},
{
"cell_type": "code",
"execution_count": 174,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Unikalių žodžių: 1,560,052\n",
"Viso žodžių: 51,808,378\n"
]
}
],
"source": [
"print('Unikalių žodžių: {:>12,}'.format(words.word.size))\n",
"print('Viso žodžių: {:>12,}'.format(words.freq.sum()))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Suskaičiuojame kiek kartų panaudota kiekviena raidė visuse žodžiuose."
]
},
{
"cell_type": "code",
"execution_count": 160,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"lcounter = collections.Counter(funcy.flatten(list(w) for w in wcounter.keys()))"
]
},
{
"cell_type": "code",
"execution_count": 162,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('i', 1512312), ('a', 1500132), ('s', 991123)]"
]
},
"execution_count": 162,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lcounter.most_common(3)"
]
},
{
"cell_type": "code",
"execution_count": 164,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"letters = pd.DataFrame(list(lcounter.items()), columns=['letter', 'freq'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Patikriname esamą lokalę, kurią naudosime lietuviškų raidžių rūšiavimui."
]
},
{
"cell_type": "code",
"execution_count": 167,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'lt_LT.UTF-8'"
]
},
"execution_count": 167,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"locale.setlocale(locale.LC_ALL, '')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Atrenkame tik lietuviškas raides."
]
},
{
"cell_type": "code",
"execution_count": 168,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"all_lt_letters = sorted(string.ascii_lowercase + lt_letters, key=locale.strxfrm)\n",
"letters['letter'] = pd.Categorical(letters['letter'], all_lt_letters)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Išvalome visas kitas raides."
]
},
{
"cell_type": "code",
"execution_count": 175,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"letters = letters.dropna(subset=['letter'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Piešiame raidžių dažnumo grafiką."
]
},
{
"cell_type": "code",
"execution_count": 176,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.text.Text at 0x7f14a5db6c18>"
]
},
"execution_count": 176,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA+MAAAJuCAYAAADb3dNiAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3X2UrVddJ/jvj7x4QUhzY14KEO6VZIIuEidBAYFlKJzQ\nEG+naUFEB5tGA2M7agvDCCPMyL1tLyVtr3EQXQLNm7rAYPdKCyYiGKRCECQyTQIicAFJRJhcQueF\n3I6aBPb8cU6SyknVTZ26Vfs8dc7ns9ZZp+p59nP2fs6zq+791n7O3tVaCwAAANDPA2bdAAAAAFg0\nwjgAAAB0JowDAABAZ8I4AAAAdCaMAwAAQGfCOAAAAHQmjAMAAEBnMw/jNfK2qrphnf3HVdVPV9VK\nVX2tql4wsX9fVX2iqm6rqo9V1blrvMagygAAALDYZhrGq2pPksuSvCBJW2P/UpKPJnldksNJfivJ\nx1ftf3ySS5LckOTlSW5PcllVnT7UMgAAAFCt3ScD96m46llJLk7ypSRfTXJGa+2UVfsryZVJzkxy\nQWvtyjVe49Ikjx0fe0dVPTjJF5O8u7V24RDLAAAAwCxHxj+d5BUZhe3PrbH/2UmenORl6wTx45Kc\nl1HQvSNJWmuHk1yeZN8QywAAAEAywzDeWjvYWvuN1trt6xT5sSS3JPmdqnpIVR0/sf+0JMcnuWZi\n+9VJTq6qEwdYBgAAAGY/gdsRfE+SOzIaNb8lyeGquqSqThrv3z1+vmniuJvHzycOsAwAAADk2Fk3\n4AhOSfL5JK9P8uUk353kVUl+L8n5M2wXAAAAHJUhh/FvJLm8tfbb4+/fXVW7k7ykqh6Ue0agd08c\n99Dx84255/yGUuZeqmo2s+cBAADQRWut1to+5NvUv5Tk1Iltd030dlKSL2S0dNg5E2XOTvK11tqN\nAyxzH621qR6vfvWrpz5mM48e9TiXYdbjXIZZj3MZZj3OZZj1OJdh1uNchlmPcxlmPc5lmPVspo4j\nGXIYX0ny9Kr61lXbfiDJrUm+1EYzll+e5IK7JnerqhMymtH80iQZWhkAAABIhh3G/+8kJyT5QFX9\nXFX9XpLnJLmo3fMnhn+b5GFJ3ltVP5fkfUkemOSiVa8ztDIAAAAsuCGF8XuN4bfWvpDkmePtFyV5\nWpJXtNZ+dVWZqzJaj/ykcZljk5zfWjs41DJHa3l5eateaub1OJdh1uNchlmPcxlmPc5lmPU4l2HW\n41yGWY9zGWY9zmWY9Wx1HXV/97Gzfaqqef8BAADmU1Wl7cAJ3AAAAGAuCeMAAADQmTAOAAAAnQnj\nAAAA0Nmxs24AAAAAI3v37s11110362awAXv27Mm111676ePNpj5DZlMHAABWG8++PetmsAEbuVZm\nUwcAAIABEcYBAACgM2EcAAAAOhPGAQAAoDNhHAAAADoTxgEAAKAzYRwAAIBBuO666/K85z0vp556\nar7lW74lD3/4w/O6171u1s3aFsfOugEAAABszNLS3hw6dN2sm3Efp566J9dff+1Rvcadd96Z888/\nP5/97Gdz3nnn5cwzz8zhw4dz+umnb00jB0YYBwAA2CFGQbzNuhn3cehQHfVrvOc978lnPvOZvPjF\nL84b3vCGLWjVsLlNHQAAgJk7ePBgqipPetKTZt2ULoRxAAAAZu7w4cNpreUnf/In84AHPCAPeMAD\ncswxx+Sqq67KK1/5ypx77rnZu3dvHvjAB+bEE0/MS1/60ruP/cd//MccOHAgj3nMY7Jr16484hGP\nyE//9E/npptuuk89N998c37xF38x3/Vd35Vdu3Zl165deeQjH5mnP/3pufjii7udr9vUAQAAGIx9\n+/blrLPOSpJUVR7+8IfnNa95TXbt2pUf+qEfyiMe8YjccsstecxjHpMkuf322/P0pz89H/rQh/LE\nJz4xz3jGM3L11VfnDW94Qz760Y/mqquuyrHHjqLv9ddfn6c85Sm59tprc/bZZ+dFL3pRdu3alWuu\nuSbvf//788hHPjI/+qM/2uU8hXEAAAAGoary3Oc+Ny94wQvus29paSlvf/vb77P9ta99bT70oQ/l\nZS97WX7t137t7u0vfvGL85a3vCUXX3xxfvzHfzxJ8rM/+7O59tpr88pXvjK//Mu/fHfZK664Iu9/\n//u34YzW5zZ1AAAAdqx3vOMdOe6447J///57bf+pn/qptNbygQ98IEny9a9/Pe9617vyyEc+MgcO\nHJhBS+/NyDgAAAA71sGDB3PnnXfmIQ95yH32VVW+8pWv3F3uG9/4Rs4+++w84AGzH5cWxgEAANix\nWmvZtWtXXvKSl6S1+y77dtc65d/85jeTJMcdd1zX9q1HGAcAAGDHOv300/OpT30qP//zP59TTjll\n3XKnnXZakuRTn/pUr6Yd0ezH5gEAAGCTnvvc56a1lpe97GW588471y33bd/2bVleXs5nP/vZvPWt\nb73Xvq9//evb3cz7MDIOAADAIKx1m/n9+YVf+IVcdtllecc73pGPfOQjOffcc3PSSSflhhtuyHXX\nXZc/+7M/u7vsa1/72px77rm58MIL84d/+Id59KMfnYMHD+b9739/qmorT+V+GRkHYFssLe1NVU31\nWFraO+tmAwAzdKRAvN6+Xbt25Yorrsiv/Mqv5CEPeUj+4A/+IK9//evzkY98JI961KPuVfass87K\nX/zFX+Q5z3lO/vzP/zxvfvObc/311989ut5T9a6Qe1RV8/4D82r0D+a0v+Oq+z+EADAkVUf+t3Bp\naW8OHbquY4s25tRT9+T666+ddTM27YorrsjTnva0vPCFL8xb3vKWDR1zf9dqVZk1/4rgNnUAAIAd\nYicHXu7NbeoAAAAsvLs+NtetPrcDzs6RblPfzO0nO/3WEGC+uE0dAKa3kVufGYajvU1dGJ+hI4Vx\n/4kFdjq/xwBgesL4znG0Ydxt6gAAANCZMA4AAACdCeMAAADQmTAOAAAAnQnjAAAA0JkwDgAAAJ0d\nO+sGAAAAMLJnz57x8qAM3Z49e47qeOuMz5B1xoF55vcYALDorDMOAAAAAyKMAwAAQGfCOAAAAHQm\njAMAAEBnwjgAAAB0JowDAABAZ8I4AAAAdCaMAwAAQGfCOAAAAHQmjAMAAEBnwjgAAAB0JowDAABA\nZ8I4AAAAdCaMAwAAQGfCOAAAAHQmjAMAAEBnwjgAAAB0JowDAABAZzMP4zXytqq64QhlHlhVn6yq\nb6yxb19VfaKqbquqj1XVuUMvAwAAwGKbaRivqj1JLkvygiTtCEXfmOSxaxz/+CSXJLkhycuT3J7k\nsqo6fahlAAAAoFo7UgbexoqrnpXk4iRfSvLVJGe01k5Zo9yPJHlTRqH9R1prx6zad2lGIf2M1tod\nVfXgJF9M8u7W2oVDLDNxbm2997+qcuS/T6x5VGZ1PQEm+T0GACy6qkprrdbaN8uR8U8neUWSM5N8\nbq0CVbU7yWuTvDTJZyb2HZfkvIyC7h1J0lo7nOTyJPuGWAYAAACSGYbx1trB1tpvtNZuP0Kxf5fk\nYGvtzWvsOy3J8Umumdh+dZKTq+rEAZYBAACAHDvrBqynqs5I8qIk37dOkd3j55smtt88fj5xgGVu\nDAAAAAtv5rOpH8GBJO9qrX181g0BAACArTTIkfHxLOs/nOSHq+rU8eaHjPedmuSW3DMCvXvi8IeO\nn2/MPec3lDL3sX///ru/Xl5ezvLy8lrFAAAAGLiVlZWsrKxsqOwgw3iSp2Y0an9JksmZ576S5CeS\n/H5GS4edM7H/7CRfa63dWFW3DqnMWie6OowDAACwc00OsB44cGDdskO9Tf1Pk5w/fjxz/Hh7Rmvk\nPDPJn45nLL88yQVVdXySVNUJGc1ofmmSDK0MAAAAJDNcZ/xejah6a5J9a60zvqrMq5P80sQ6409I\ncmWSD2c0iv78jJZKe1xr7eAQy0yck3XGgbnl9xgAsOiGus74pI387+teZVprVyV5dpKTklyU0W33\n568OvkMrAwAAAIMYGV9URsaBeeb3GACw6HbKyDgAAAAsBGEcAAAAOhPGAQAAoDNhHAAAADoTxgEA\nAKAzYRwAAAA6E8YBAACgM2EcAAAAOhPGAQAAoDNhHAAAADoTxgEAAKAzYRwAAAA6E8YBAACgM2Ec\nAAAAOhPGAQAAoDNhHAAAADoTxgEAAKAzYRwAAAA6E8YBAACgM2EcAAAAOhPGAQAAoDNhHAAAADoT\nxgEAAKAzYRwAAAA6E8YBAACgM2EcAAAAOhPGAQAAoDNhHAAAADoTxgEAAKAzYRwAAAA6E8YBAACg\nM2EcAAAAOhPGAQAAoDNhHAAAADoTxgEAAKAzYRwAAAA6E8YBAACgM2EcAAAAOhPGAQAAoDNhHAAA\nADoTxgEAAKAzYRwAAAA6E8YBAACgM2EcAAAAOhPGAQAAoDNhHAAAADoTxgEAAKAzYRwAAAA6E8YB\nAACgM2EcAAAAOhPGAQAAoDNhHAAAADoTxgEAAKAzYRwAAAA6E8YBAACgM2EcAAAAOhPGAQAAoLOZ\nh/EaeVtV3TCx/YyqemdV3VBV/62qLq+q713j+H1V9Ymquq2qPlZV5w69DAAAAIttpmG8qvYkuSzJ\nC5K0id3vSXJWkl9PclGSxyR5b1Wdsur4xye5JMkNSV6e5PYkl1XV6UMtAwAAANXaZAbuVHHVs5Jc\nnORLSb6a5IzW2uqgfU6ST7bW7hx///1Jrkjywtba7463XZrkseNj76iqByf5YpJ3t9YuHGKZifeg\nrff+V1Xu+/eJ+31XM6vrCTDJ7zEAYNFVVVprtda+WY6MfzrJK5KcmeRzkztbax+/K4iPfX78/NAk\nqarjkpyXUdC9Y3zM4SSXJ9k3xDIAAACQzDCMt9YOttZ+o7V2+wYP+cGMhliuGH9/WpLjk1wzUe7q\nJCdX1YkDLAMAAACzn8BtI6rq0Ulek+R3Wmt3hd3d4+ebJorfPH4+cYBlAAAAYPhhvKoeluR9Sf42\nyc/MuDkAAABw1I6ddQOOpKpOSPLejG5PP7+19verdt81Ar174rCHjp9vzD3nN5Qy97F///67v15e\nXs7y8vJaxQAAABi4lZWVrKysbKjsoMN4krcnOSXJk1prX53Y94WMlg47Z2L72Um+1lq7sapuHVKZ\ntU5wdRgHAABg55ocYD1w4MC6ZQd7m/p46bMfTPL81toXJ/ePZyy/PMkFVXX8+JgTMprR/NIhlgEA\nAIBkhuuM36sRVW9Nsm9infE/yWhU+VUTxW9trf3BuMwTklyZ5MNJLkny/IyWSntca+3gEMtMnLd1\nxoG55fcYALDohrrO+KTJ/32dluTkJG+ceLzm7gNauyrJs5OclOSijG67P3918B1aGQAAABjEyPii\nMjIOzDO/xwCARbdTRsYBAABgIQjjAAAA0JkwDgAAAJ0J4wAAANCZMA4AAACdCeMAAADQmTAOAAAA\nnQnjAAAA0JkwDgAAAJ0J4wAAANCZMA4AAACdCeMAAADQmTAOAAAAnQnjAAAA0JkwDgAAAJ0J4wAA\nANCZMA4AAACdCeMAAADQmTAOAAAAnQnjAAAA0JkwDgAAAJ0J4wAAANCZMA4AAACdCeMAAADQmTAO\nAAAAnQnjAAAA0JkwDgAAAJ0J4wAAANCZMA4AAACdCeMAAADQmTAOAAAAnQnjAAAA0JkwDgAAAJ0J\n4wAAANCZMA4AAACdCeMAAADQmTAOAAAAnQnjAAAA0JkwDgAAAJ0J4wAAANCZMA4AAACdCeMAAADQ\nmTAOAAAAnQnjAAAA0JkwDgAAAJ0J4wAAANCZMA4AAACdCeMAAADQmTAOAAAAnQnjAAAA0JkwDgAA\nAJ0J4wAAANCZMA4AAACdCeMAAADQmTAOAAAAnQnjAAAA0JkwDgAAAJ3NPIzXyNuq6oY19u2rqk9U\n1W1V9bGqOnceygAAALDYZhrGq2pPksuSvCBJm9j3+CSXJLkhycuT3J7ksqo6fSeXAQAAgGqt3X+p\n7ai46llJLk7ypSRfTXJGa+2UVfsvTfLY8fY7qurBSb6Y5N2ttQt3apmJ96Ct9/5XVSb+PrGRdzWz\nup4Ak/weAwAWXVWltVZr7ZvlyPink7wiyZlJPrd6R1Udl+S8jELsHUnSWjuc5PIk+3ZqGQAAAEhm\nGMZbawdba7/RWrt9jd2nJTk+yTUT269OcnJVnbhDywAAAMDsJ3Bbx+7x800T228eP5+4Q8sAAADA\nYMM4AAAAzK1jZ92Addw1urx7YvtDx8835p6276Qy97F///67v15eXs7y8vJaxQAAABi4lZWVrKys\nbKjsUMP4FzJaFuycie1nJ/laa+3Gqrp1p5VZ60RXh3EAAAB2rskB1gMHDqxbdpC3qY9nI788yQVV\ndXySVNUJGc1WfulOLQMAAADJQMP42L9N8rAk762qn0vyviQPTHLRDi8DAMAOsLS0N1U11WNpae+s\nmw3sEEMK4+1e37R2VZJnJzkpozB7bJLzW2sHd3IZAAB2hkOHrsvov6gbf4yOAbh/1Vq7/1Jsi6pq\n673/VZWJv09s5BXjegJD4fcYsNP5PQYcrapKa63W2jekkXEAAABYCMI4AAAAdCaMAwAAQGfCOAAA\nAHQmjAMAAEBnwjgsKGunAgDA7FjabIYsbcYs6WNsN30M2On8HgOOlqXNAAAAYECEcQAAAOhMGAcA\nAIDOhHEAAADoTBgHAACAzoRxAAAA6EwYBwAAgM6EcQAAAOhMGAcAAIDOhHEAAADoTBgHAACAzoRx\nAAAA6EwYBwAAgM6EcQAAAOhMGAcAAIDOhHEAAADoTBgHAACAzoRxAAAA6EwYBwAAgM6EcQAAAOhM\nGAcAAIDOhHEAAADoTBgHAACAzqYO41X1L6rq+1d9f2JV/VFV3VJVH6qqPVvbRAAAAJgvmxkZ/7Uk\n/3zi+6cleWeS70zyH7agXQAAADC3jt3EMY9K8sUkqapTkjw/yc+21t5UVX+X5H/dwvYBAADA3NnM\nyPgNGY2AJ8nLxt+/bfz9oSS7j75ZAAAAML82MzL+ziQvqapnJDk9yc+31u4c7/v2jMI5AAAAsI7N\nhPH/I8mtSR6X5I1JfmvVvmOSvGML2gUAAABzq1prs27Dwqqqtt77X1VJpr02FdeTjdLH2G76GLDT\n+T0GHK2qSmut1tq3mZHxVNW3JnlyklNz38+dX9ta++BmXhcAAAAWwdRhvKqemuQ/Jzkxyeo/F971\n9V8m+b6taiAAAADMm83Mpv7vx88/mtGs6v+Q5CVJviPJgSR7t6RlAAAAMKc2E8b/xyS/3lr7TxnN\nnP7AJF9orV2X5MuxtBkAMCeWlvamqqZ6LC3tnXWzAdgBNvOZ8f+e5Lbx17ck+WaSR4y/f1iSw1vQ\nLgCAmTt06LpMO4HXoUNrztMDAPeymTD+xSRnJElr7ZtVdWWSV1XVKUl+JslHt7B9AAAAMHemXtqs\nqvYkubW1duP4+7OSvDfJUpK/SfKs1tqntrqh88jSZsySPsZ208eYB/rxYnP9gaN1pKXNtmSd8ao6\nJsnu1trXjvrFFogwzizpY2w3fYx5oB8vNtcfOFpbvs74xIs/MKNlzW6rqgclSWvttiMfBQAAAItr\n6tnUq+qhVfWbVfV3VXVHRhO23TrxAABgg8zaDrB4NjMy/oYkz03ygSQXZxTG3YsDALBJZm0HWDyb\nmcDtliTvbK39L9vTpMXhM+PMkj7GdtPHmAe9+rGfl2FyXYCjdaTPjE99m/rY1UfRHgAAAFhom7lN\n/cokT6mqNyV5RJJdGU3gdrfW2l9vQdsAAABgLm3mNvVnJPnjJHckOW5yd5LWWjtma5o339ymzizp\nY2w3fYx54Db1xea6AEdrq5c2+/GMfiv9fpL/NyZwAwAAgKlsZmT8cJLXt9b+9+1p0uIwMs4s6WNs\nN32MeWBkfLG5LsDR2uoJ3I5L8umjaxIAAAAsrs3cpv7VJE+uqv9vvQKttT/efJMAAABgvm1mZPyv\nk/xEkj9Kcumqxx+tet4yVfUvq+qzVfWPVfX5qvr5if1PrKqPVtXfV9VfV9Wz13iNQZUBAABgsW3m\nM+OnZ7Sk2bpaa1ccTaNW1fXCJG9JcklGS6r90yTnJ7mwtfbWqnpUkk8k+Zskb03yzCTPSPK01tqV\n49cYVJmJ8/OZcWZGH2O76WPMA58ZX2yuC3C0jvSZ8anDeE9V9ZdJbm+tPWX8fSX5qyQ3t9aeUlW/\nmeQFSfa21m6sqmOSXJPkUGvtfxofM6gyE+cnjDMz+hjbTR9jHgjji811AY7Wli5tVlXn3k+RL7fW\nvjDt667jpIxGmpOMFjCvqi8neeh4074kK621G8f7v1FVf5TkZVX14Nba4QGWAQAAYMFt5jPjK0k+\ncITH67aqcUnem+SZVbWcJFX12CTnJvndqnpgkj1Jrp445uokxyQ5Y2hlNnjOAAAAzLnNzKZ+wRH2\n/YuMPie9VX4hoxD7p1V1eZLHJ/m91tpvVtXDxmVumjjm5vHziblnBH0oZQAAAGD6MN5au2y9fVW1\nlORfHlWL7u3kJN+R5D9ntKTa45OcX1Xfk+QrW1gPAAAAdLOZkfEj2ZPkzi18vd9N8rettR9Lkqo6\nkOTDSd6Z5Kxxmd0Tx9w1Qn1j7hmVHkqZ+9i/f//dXy8vL2d5eXmtYgAAAAzcyspKVlZWNlR2MxO4\nPWhyU0a3YD81yc8l+YtpX3Odeh6a5MlJXn7XtvEM5W9M8msZjZpfl+SciUPPSfLNJAdba39fVYMp\ns9Z5rg7jAAAA7FyTA6wHDhxYt+xmJnA7nOTWVY+vJ7k2o1Hsw0lesonXXMvfJ/lGku+a2P7ojMLt\nDUkuS/LUqjopScZLiV2Q5IOrZi4fWhkAAAAW3NTrjFfV/tx7wcWWUSD/myTvaa3dsWWNq/qPSX4i\nyTuS/GWS78noM+l/2Fp7TlXtyWjps79J8tYk5yc5L8kPtNauHL/GoMpMnJ91xpkZfYztpo8xD6wz\nvthcF+BoHWmd8anDeE9V9S1JfjnJ85KckuRvk/xxkle11m4bl3liktcm+e6Mblt/VWvtkonXGVSZ\nVWWFcWZGH2O76WPMA2F8sbkuwNHaljA+/kz3o5L8k4w+N3631toHN/WiC0YYZ5b0MbabPsY8EMYX\nm+sCHK0jhfHNTOB2UpI3JPnnue9nzu/6jXXMtK8LAAAAi2IzS5u9LqMg/h+TfCTJTZn+T4YAAACw\nsDYTxp+R5Ldba/9mqxsDAAAAi2AzS5s9JMknt7ohAAAAsCg2MzJ+c5LdVfWg9QrcNdM5AAAAcF+b\nWWf8Q0medKQyrTUTuG2A2dSZJX2M7aaPMQ/Mpr7YXBfgaG3pbOpJ/n2Sc46uSQAAALC4Nr3OOEfP\nyDizpI+x3fQx5oGR8cXmugBH60gj41NP4FZVv1RV562z7/ur6i3TviYAAAAskiOG8ao6pqp+bGLz\n/iR/UlW/tMYhZyT5V1vUNgAAAJhL9zcyvivJv6mqt1XV6knZPprkl6rq8qpa2r7mAQAAwPw5Yhhv\nrf33JP8sybOT/OaqXW9O8vQk35nkr6rqedvWQgAAAJgzG/nM+H9I8rkk97otvbX2gSRnJXl/kt+v\nqkuTPHrLWwgAAABz5v4+M35CkpuSPLW1dsPk/tbaTa215yV5TkbLnf3itrQSAAAA5sj93ab+9dba\n/9ZaO7xq84Ek/3Wi3H/JaPK2X03ywS1vJQAArLK0tDdVteHH0tLeWTcZ4F6sMz5D1hlnlvQxtps+\nxjywzvhwTf+euS5Af0daZ/zYTb7go5I8I8mpue/o+qdba/9pM68LAAAAi2DqMF5Vz0ny9iTHjze1\nJKuT/n9NIowDAADAOjYym/qk/Um+nOSJSR6U5B+S/Fhr7QFJfjbJni1rHQAAAMyhzYTx/yHJb7fW\n/jLJA8ePW8f7bk9ywha1DQAAAObSZsL413PPTBY3J7kjo4CeJHuT3HL0zQIAAID5tZkJ3D6X0TJm\naa21qnpvkv+rqs5M8rwkf7KF7QMAAIC5M/XSZlX1oNbabau+//Yk/yXJdyX5iyT/qrX25S1t5Zyy\ntBmzpI+x3fQx5oGlzYbL0mbATrDVS5t9a1VdkOTbx9//XZJ9rbWvbraBAAAAsEg2HMar6vgk/0+S\nC5McN7H7zqp6U5KXttb+cQvbBwAAAHNnmpHxP0zyzCQXZ7TO+OczWl/89CQ/nuRfJ/mOJOdvcRsB\nAABgrmwojFfVj2YUxH+ytfa2id2fSXJpVb0vyZuq6nmttXdubTMBAABgfmxoArequjTJCa21c++n\n3AeTfL219s+2qH1zzQRuzJI+xnbTx5gHJnAbLhO4ATvBkSZw2+g649+T5H0bKPfeJI/baMMAAABg\nEW00jH9bkkMbKPfVJCdtvjkAAAAw/zYaxo9N8o0NlGtJjtl8cwAAAGD+bfQz47dkNHP6P9xP0V1J\nvtFa270FbZt7PjPOLOljbDd9jHngM+PD5TPjwE5wpM+Mb3Rps99I8u0bLPu3GywHAAAAC2lDI+Ns\nDyPjzJI+xnbTx5gHRsaHy8g4sBNsxWzqAAAAwBYRxgEAAKAzYRwAAAA6E8YBAACgM2EcAAAAOhPG\nAQAAoDNhHAAAADoTxgEAAKAzYRwAAObc0tLeVNVUj6WlvbNuNsy1aq3Nug0Lq6raeu9/VSWZ9tpU\nXE82Sh9ju+ljzINe/djPy/Smf88W+7rM07nATlJVaa3VWvuMjAMAAEBnwjgAAAB0JowDAABAZ8I4\nAAAAdCaMAwAAQGfCOAAAAHQmjAMAAEBnwjgAAAB0JowDAABAZ8I4AAAAdCaMAwAAQGfCOAAAAHQm\njAMAAEBngw/jVXVcVf10Va1U1deq6gUT+/dV1Seq6raq+lhVnbvGawyqDAAAAItt0GG8qpaSfDTJ\n65IcTvJbST6+av/jk1yS5IYkL09ye5LLqur0oZYBAACAaq3Nug1rqqpKcmWSM5Nc0Fq7co0ylyZ5\nbJIzWmt3VNWDk3wxybtbaxcOscxE+9t67//o9Ke9NpWhXk+GRx9ju+ljzINe/djPy/Smf88W+7rM\n07nATlJVaa3VWvuGPDL+7CRPTvKydYL4cUnOyyjo3pEkrbXDSS5Psm+IZQAAYJ4tLe1NVW34sbS0\nd9ZNhpm/NFC7AAAgAElEQVQZchj/sSS3JPmdqnpIVR0/sf+0JMcnuWZi+9VJTq6qEwdYBgAA5tah\nQ9dlNAK/sceoPCymIYfx70lyR5LPZRTKD1fVJVV10nj/7vHzTRPH3Tx+PnGAZQAAACDHzroBR3BK\nks8neX2SLyf57iSvSvJ7Sc6fYbsAAADgqAw5jH8jyeWttd8ef//uqtqd5CVV9aDcMwK9e+K4h46f\nb8w95zeUMvexf//+u79eXl7O8vLyWsUAAAAYuJWVlaysrGyo7JDD+JeSnDqx7XPj55OSfCGjpcPO\nmShzdpKvtdZurKpbh1RmrZNcHcYBAADYuSYHWA8cOLBu2SF/ZnwlydOr6ltXbfuBJLcm+dJ4xvLL\nk1xw1+RuVXVCRjOaX5okQysDAAAAybDXGT8tyV8l+WRGnxN/QpL/Ocn/2Vr71XGZJ2S0FvmHk1yS\n5PkZrUv+uNbawSGWmThH64wzM/oY200fYx5YZ3y4rDM+neH25WG+X7BVduQ64621LyR5ZkY/zRcl\neVqSV9wVxMdlrspoPfKTxmWOTXL+6uA7tDIAAAAw2JHxRWBknFnSx9hu+hjzYLijiZurZ54YGZ/O\ncPvyMN8v2Co7cmQcAAAA5pUwDgAAAJ0J4wAAANCZMA4AAACdCeMAAADQmTAOAAAAnQnjAAAA0Jkw\nDgAAAJ0J4wAAANCZMA4AbKmlpb2pqqkeS0t7Z91sAOiqWmuzbsPCqqq23vtfVUmmvTYV15ON0sfY\nbvrY4pqna9/rXObpPetl+vdssa/LcPvyMN8v2CpVldZarbXPyDgAAAB0JowDAABAZ8I4AAAAdCaM\nAwAAQGfCOAAAAHQmjAMAAEBnwjgAAAB0JowDAABAZ8I4AAAAdCaMAwAAQGfCOAAAAHQmjAMAAEBn\nwjgAAAB0JowDAABAZ8I4AAAAdCaMAwAAQGfCOAAAAHQmjAMAAEBnwjgAAAB0JowDwAJZWtqbqprq\nsbS0d9bNBoC5U621WbdhYVVVW+/9r6ok016biuvJRuljbDd9bJh6XJd5uva9zmWe3rNepn/PFvu6\nDLcvD/P9gq1SVWmt1Vr7jIwDAABAZ8I4AAAAdCaMAwAAQGfCOAAAAHQmjAMAAEBnwjgAAAB0JowD\nAABAZ8I4AAAAdCaMAwAAQGfCOAAAAHQmjAMAAEBnwjgAAAB0JowDAABAZ8I4AAAAdCaMAwAAQGfC\nOAAAAHQmjAMAAEBnwjgAsCMtLe1NVU31WFraO+tmA0CSpFprs27Dwqqqtt77X1VJpr02FdeTjdLH\n2G762DD1uC69rr1zWeyfl+nfs8W+LsPty8N8v2CrVFVaa7XWPiPjAAAA0JkwDgAAAJ0J4wAAANCZ\nMA4AAACdCeMAAADQmTAOAAAAne2YMF5VD6yqT1bVNya276uqT1TVbVX1sao6d41jB1UGAACAxbZj\nwniSNyZ57OoNVfX4JJckuSHJy5PcnuSyqjp9qGUAAACgWmuzbsP9qqofSfKmJJcl+ZHW2jHj7Zdm\nFNDPaK3dUVUPTvLFJO9urV04xDIT59XWe/+rKsm016ayE64nw6CPsd30sWHqcV16XXvnstg/L9O/\nZ4t9XYbbl4f5fsFWqaq01mqtfYMfGa+q3Ulem+SlST6zavtxSc7LKOjekSSttcNJLk+yb4hlAAAA\nINkBYTzJv0tysLX25ontpyU5Psk1E9uvTnJyVZ04wDIAAACQY2fdgCOpqjOSvCjJ962xe/f4+aaJ\n7TePn08cYJkbAwAAwMIb+sj4gSTvaq19fNYNAQAAgK0y2JHxqtqT5IeT/HBVnTre/JDxvlNzzwj0\n7olDHzp+vjH3nN9QytzH/v377/56eXk5y8vLaxUDAABg4FZWVrKysrKhsoMN40memtHI/SVJJmef\n+0qSf53R0mHnTOw7O8nXWms3VtWtQyqz1kmuDuMAAADsXJMDrAcOHFi37JBvU//TJOePH88cP96e\n0VoJz8xombPLk1xQVccnSVWdkNGM5pcmyXhW88GUAQAAgGSHrDN+l6p6dZJfWrXO+BOSXJnkwxmN\noD8/yZlJHtdaOzjEMhPnY51xZkYfY7vpY8Nkbe7FPZd5Y53x6Qy3Lw/z/YKtsqPXGV/D3T+trbWr\nkjw7yUlJLsrotvvzVwffoZUBAACAHTUyPm+MjDNL+hjbTR8bJqPJi3su88bI+HSG25eH+X7BVpm3\nkXEAAADY0YRxAAAA6EwYBwAAgM6EcQAAAOhMGAcAAIDOhHEAAADoTBgHAACAzoRxAAAA6EwYBwAA\ngM6EcQAAAOhMGIeBWVram6qa6rG0tHfWzQYAAKZQrbVZt2FhVVVb7/2vqiTTXpuK67nz9br2+hjb\nTR8bph7XZZ5+j83Tucyb6d+zxb4uw+3Lw3y/YKtUVVprtdY+I+MAAADQmTAOAAAAnQnjAAAA0Jkw\nDgAAAJ0J4wAAANCZMA4AAACdCeMAAADQmTAOAAAAnQnjAAAA0JkwDgCwIJaW9qaqNvxYWto76yYD\nzK1jZ90AAAD6OHTouiRtivK1fY0BWHBGxgEAAKAzYRwAAAA6E8YBAACgM2EcAAAAOhPGAQAAoDNh\nHAAAADoTxgEAAKAzYRwAAAA6E8YBAACgM2EcAAAAOhPGAQAAoDNhHAAAADoTxgEGZGlpb6pqqsfS\n0t5ZNxsAgCkdO+sGAHCPQ4euS9KmPKa2pzEAAGwbI+MAAADQmTAOAAAAnQnjAAAA0JkwDgAAAJ0J\n4wAAANCZMA4AAACdCeMAAAAzsLS0N1U11WNpae+sm80Wsc44AADADBw6dF2SNuUxtT2NoTsj48wF\nf1UEAAB2EiPjzAV/VQQAAHYSI+MAAADQmTAOAAAAnQnjAAAA0JkwDgAAAJ0J42w7M50DAADcm9nU\n2XZmOgcAALg3I+MAAADQmTAOAAAAnQnjAAAA0JkwDgAAAJ0NOoxX1RlV9c6quqGq/ltVXV5V3ztR\nZl9VfaKqbquqj1XVuWu8zqDKAAAAsNgGHcaTvCfJWUl+PclFSR6T5L1VdUqSVNXjk1yS5IYkL09y\ne5LLqur0u15gaGUAAACgWptuyamequqcJJ9srd05/v77k1yR5IWttd+tqkuTPDbJGa21O6rqwUm+\nmOTdrbULx8cMqszE+bX13v+qyrTLgSWVIV7PHufi/Zr+XObpPZsn83Rd5ulc5sk8/U52Lj3qGe7P\nZI9zmaffY/rYMM1TH2NtVZXW2prrNg96ZLy19vG7gvjY58fP/6SqjktyXkZB945x+cNJLk+yL0mG\nVgYAAACSgYfxNfxgRn86uiLJaUmOT3LNRJmrk5xcVScOsAwAAADsnDBeVY9O8pokv9Na+0SS3eNd\nN00UvXn8fOIAywAAAMDOCONV9bAk70vyt0l+ZsbNAdjRlpb2pqqmeiwt7Z11swEA5sqxs27A/amq\nE5K8N6Pb089vrf39eNddI9C7Jw556Pj5xtxzfkMpcx/79++/++vl5eUsLy+vVQxgyxw6dF2mnSzm\n0KE15x0BAGCVlZWVrKysbKjs4MN4krcnOSXJk1prX121/QsZLR12zkT5s5N8rbV2Y1XdOqQya53c\n6jAOAADAzjU5wHrgwIF1yw76NvWqelZGk7Y9v7X2xdX7xjOWX57kgqo6flz+hIxmNL90iGUAAAAg\nGf4643+S0cjyqyZ23dpa+4OqekKSK5N8OMklSZ6f5Mwkj2utHRy/xqDKTJyfdcbXP2qQ68D2MNx1\nQDdXD9OZp58XfWyY9LHFPZfN1TPcn0nrjE9HHxumeepjrG3HrjOe0XJhJyd548TjNUnSWrsqybOT\nnJTkooxuuz9/dfAdWhkAAAAY9Mj4vDMyfsSjBjly0cNw/3K9uXqYzjz9vOhj01la2jueXG/jTj11\nT66//tqpjtHHFvdcNlfPcH8mjYxPRx8bpnnqY6ztSCPjO2ECNwCYe2a5B4DFMvTb1AEAAGDuCOMA\nAADQmTAOAAAAnQnjAAAA0JkwDgAAAJ0J4wAAANCZMA4AAACdCeMAcD+WlvamqqZ6LC3tnXWzAYAB\nO3bWDQCAoTt06LokbcpjansaAwDMBSPjAAAA0JkwDgAAAJ0J4wAAANCZMA4AAACdCeMAAADQmTAO\nAAAAnQnjAAAA0JkwDgAAAJ0J48C2WVram6qa6rG0tHfWzQaArqb999K/lTAfqrU26zYsrKpq673/\nVZVk2mtTGeL17HEu3q/pz8V1GaZ5ui7zdP1dF+eynXUMu55h/kwmfc5lnq7LPJ3LPJmnfytZW1Wl\ntVZr7TMyDgAAAJ0J4wAAANCZMA4AAACdCeMAAADQmTAOsAFmhgcAmL15Wn3AbOozZDb1Ix41yNlu\nexjubKfT1+O6LPZ1cf0X97o4l2Gey+bqGebPZGIG8kU+l3kyT/9W9rLT+pjZ1AEAAGBAhHEAAADo\nTBgHAACAzoRxAAAA6EwYBwAAgM6EcQAAAOhMGAcAAIDOhHEAAADoTBgHAACAzoRxAAAA6EwYBwAA\ngM6EcQAAAOhMGAcAAICxpaW9qaqpHktLe6euRxiHKfT6wQQAAGbj0KHrkrSpHqNjpiOMwxR6/WAC\nALA2gyPMi2Nn3QAAAICNumdwZJpjansaA0fByDgAAAB0JowDAADMsWlv7Xdbfx9uUwcAAJhj097a\n77b+PoyMAwAAQGfCOAAAAHQmjC8wy0IAAADMhs+MLzDLQgAAAMyGkXEAAADoTBgHAACAzoRxAAAA\n6EwYBwAAmDDtZMcmOmZawjgAO5ZVIQDYLvdMdryxx6g8bJzZ1AHYsawKAQDsVEbGAQAAoDNhfBtU\n1b6q+kRV3VZVH6uqc2fdJphnblUGAJgt/x+bnjC+xarq8UkuSXJDkpcnuT3JZVV1+kwbBnNs2s90\n+VwXAMDW8v+x6QnjW+/VSb6S5Jmttd9M8k+T/EOSX5xpqwAAOjA6BrAxwvgWqqrjkpyX5N2ttTuS\npLV2OMnlSfZtTS0rW/Myg6inRx296ulRR696etTRq54edfSqp0cdverpUUevenrU0aueHnX0qqdH\nHb3q6VHH1taz/ujYB9bZvtWjYytb+FqzrKNXPT3q6FVPjzp61dOjjl719KijVz1bW4cwvrVOS3J8\nkmsmtl+d5OSqOvHoq1g5+pcYTD096uhVT486etXTo45e9fSoo1c9PeroVU+POnrV06OOXvX0qKNX\nPT3q6FVPjzp61dOjjl719KijVz096uhVT486etXTo45e9fSoo1c9W1uHML61do+fb5rYfvP4eQvC\nOAAAADudMA4AAACdVWtt1m2YG1X1nUn+OsmLWmtvWbX9FUl+JcnJrbUbV2335gMAAMyx1lqttf3Y\n3g2Zc1/IaCmzcya2n53ka6uDeLL+RQEAAGC+uU19C41nUL88yQVVdXySVNUJGc2wfuks2wYAAMBw\nuE19i1XVE5JcmeTDSS5J8vwkZyZ5XGvt4CzbBgAAwDAYGd9irbWrkjw7yUlJLsroowDnC+L3qKpv\nmXUb5kFVPaCqfNSEo1ZVD5p1G5iNqjqpqs7uWN+uqjqtV30AMGTC+DZorV3WWjurtfag1tr3ttau\nnHWbhqKqLknywVm3Y6erqqclOZzkybNuy6KrqhdV1aeq6raq+tL47pgdo6releT/b+/Ow+yo6jSO\nf1/ZF1ECCaCgwgyP4C6IioICskkEZARRH1HEDQV8RlxAcEdwRhlEBcUFYUYWWVRWE5YQlEgikCEq\nAgmbAk6EBEJCQgKB/OaP32m9adPZ+lTdvp338zz36eT07fNW1b21nKpTp8a0mHeTpPNayLlQ0sSm\nc9rS4HK7GPhhA/UOZAxwfot5ZmZmQ5avqlnbNgQ8cN3grQu4h0GXSdqLbMhcA3wfeA5wX1cnasVt\nQLvr5LrA+i3krFdew0VTy833qpmtoiQdDTwTEd/u9rSYrarcGDczW3nvBB4FRkfE092emB7x8mhh\nsJKIGC1pOJ34a2W5mdkq5SjgZsCNcbMucTd1s0qG2YG/LZ8XAPe5Ib782mxQDqfG63CaFzNbfj62\nMBve3BjvEZL2lXSHpLmS5ki6VNIWvZZRbCrpKkmzJM2Q9D1JjXRblbSjpPGS5kl6VNKVTWUAcxvM\neLukW8t9yVOBj9XOKDn7SposaYGkeyRVz5E0WtItJePPkj5dO6PkND4vZO+i7SUtKq9nGshA0msl\nXVvWzfmS7pZ0QcWIbcu6v0DSg5JOauoAsHz21zVRd7+ciyVVv2VA0o/K9mT1jrI1yrr59dp5HRlt\nLbcDJD0j6fCms2qRdJykKZJmS1pY1o+DJH1J0rTyvZ4m6eCKebd25DW2zkjaWdIN5fs1XdK3JFW5\n/ULS58pym1GW0VRJx0hq5NhQ0g6Sxpbl9pSkv0oaI2mrihkfl3RjWVYLJP2hVt0dGTuVjLnlOKaR\nMTf6coDHm8wBDuzYh1VdXmUff2G/surb5uGUI+mM8t1avV/5A5LOqlD3/H77r8+WfdqzOsq+XNaf\nQd0OKemt5Xv1vo6yPUrZ/oOpu1/OCzu+w52ve3shw93Ue8cc4FLgAeBfya5FawF791gGwPOAm4Cv\nAC8GPgJsSo5CX41yhODxwL3AieSAZyN7MOMtwM+BW4AvAiOA9y31j1YuZ1/gEuC3wOeA3YHTJc2M\niIsqZYwGLgMmlYydgG9ImhMR1QaRamNeOvwZOIGG7ruWtANwPfCXkjMXeDdwYMWYAM4BHgP2BI4t\nOSdVzOjMakM0lHUVcBjwemBCKXsNua28poG8Po0vN0nbAz8FTo2IM5rOq2gP8gkmxwKrAR8ALiDX\nzTOB+eQJzJ9K+nVE/K1C3siSJ2Cv8u/pwHcHWfffSXoDcC1wB7ntfz5wBPAK4C0VIvYERgFfA54G\n3gx8Hdga+FCF+v9OObDlr8lldDLwUMl7F7A5uQ+t4SBgK+AUchtW9UkRkjYArgD+ChwHrEN+96pq\nK6e4Behb3x+tXPeStltNbJuHU87lwIeB15KPSUb5BIrnk8fogzG+1P1KYHIpeyOwNrldmVLKXg/8\nLiKeHExYRIyRdD5wsqQryG3x94ELI2Kw89JpJotvs7YFPsU/9tFDOyMi/OrBF3A6ufNcs5cyyA3B\nTf3KjgGeAV5RefrHkicW1mlwGbWRMQG4q/NzAEaXZfamijm3kRviZ3WUTQF+UznjdmC1jrLrgdsq\nL7PG56XU+U/f5wY+/+vIA9hnd5R9iRx0p5F5AP4E/LGh+bkZuK7JZVZyLgLubaDeDYAFwEkdZZ8B\n5jW8PW5kufV9/mQDZjrw84Y/l+rrTP86gRcCi4CvdJTtXMo+1MQ8lHVmcuX5+jVwP7B2R9lHyrZ/\ndEPzcUqp/yWV5+V64G/AczvKmtiPNbpNBrYr36N3N5XRcs59ZMOotfqb2DYPpxzyxO7cftuvI4En\ngHUHWfeoss59svxfwCPAQuDIUrYaeXHuS5XmZ2PgYeBs8pHPM4CRDX7n1inb4z/S0LF57Qx3U+8x\nktYqZ0ynkCvRqF7M6OeckrNHrQolrU1eOTg/IubXqrcLGesCOwIXRcRTTWSUnE2AlwBXAiMlbSJp\nU/IK9nYNZGzckTER2EbSOg3kNDIvbSldxHYGzouIx1uMngQ0cYtKz4uIOeRJuM4udjsDv21yHW3Y\nCOBq4B7gPV2elkGLiL+QV/he1FF8a/n5vIZiJwFb1qqsbA/fQO5fFnT86iyykVZtf9nPaeT+eM9a\nFZZ95RuBcyPisVr1dsmd5FX9L0iqtoy6mGNDTOTV6GuBt3UU7wNcHxFPDLLuh8lj+11L0avIp45c\nSPZUgbwivx65nxu0iJgJHE326PwkeSJgRo26B3AKeUL2oKaOzWtnuDHeIyQdKOlPZBePx4BTy6+q\nfYZtZAygb6UcUbHOEeTZvekV6+xGxnPJA6MmN1wAG5WffV0tpwP/R16FWVvSmhUzju6XcQw5jxtW\nyOjMaXJe2rIR+R17oOXcWcCzW87sJeeSJ5BeWU6Y7EY2ZnvVKHIwwgUMn+OCuWTXSwAiYm75Z1OP\nu5tFPtqwlg3JdX+xLvURsZDsLln1dqgOD5afNevv21c+uKw3DnWlMfQmYCowRtKdkg7o1ZwWBO08\nOnO45VwKvErSC8rFsd0YfBf1Pr8Cdin3je9B3s73C2A3SX233TxK9piq5SrgSXL/cusy3rvSJL0V\n+CjwqYi4s1cyfM94D5D0MuBnwA3kvV2zgbcDH+yljKXouwJXs8H5GLnR3Kxind3ImEVeBWn6KuWs\n8vOHwC/7/7LSFb++jDPJbl39d2gzK2R05jQ5L23puxpe80TV8vDI3Ut3Cfl9PQQYR3ZZu6yrUzQ4\nd5K3PvwSOJ/c9ve6gQ6amzqQrr3O9G37F9u/SFqDPElXa3vZX18j/JGKdfbtK5s6gdCqiLgLOEDS\n1uQYARdJ2jEibu7BnKfJEyVNmUM7+6/hlvML4Hvk2DAPl7KLK9V9CTkOwZvJhvfV5H5sA7IHy17A\n5VH6Y1fyPXJciIXA2ZJeFxGLKtaPpGcDPwDGRsQPatbddMZwOQM+3G1DHkB8PiLOj4hfkWeyei1j\nIEeSO+pra1VYzipPBt6thkZqbyljPnnP+Hsk1bpyvKSc6cDdwIsj4ur+rwYyrllCTpVGchvz0pbS\nNf124L3lILyPH3XTReXq5E+AQ8lnzd8WEdO6OlGDFBFjyG6E+0n6aoNRTwO91DulK8q2fzxwcOnm\n3edQsvHU1LbsEHJ/PK5WhWVfeSNwUL/RmXt6O1Yay4eRx9I79WjODBa/naO2e8knjlS5DW1VyYmI\n2WSj+X3kOnllRFQ5QRYRk8nBLT9Mfp+uLLeP3EgOELkDecGkCknvAA4gB9Y8lBw87rO16u9wInmi\npJGnDTWZ4SvjvWEyeQDzXUk/Jc/M7dZCxq5L/5OVtrmko4GngF3IlfTsiLitcs5xZHecyZLOI8/0\nvxQ4Kuo9F7qNjGPIg7L/LY+1eJjsvlbbccAFkm4gR/OcTXZdvSIiJlbKOJY8u38j2eVqFjlC6ISI\nqDkSdRvz0pYTyW7RE8qopFuQB2bWXaeRt1wcQn6ve15EXChpR+A4SVdFRBMnZO8mu0h+OiJObqD+\n4eQzwO+ASZLOIUcd/xgwPiJqPUJzO+Wjmm4kT8p/CLgsIn5fqf4+XyG7qk4oec+hgaeCNE3SNmQv\nkknkVb59yJMXVY9f2sohL4IcL+k75Kj9L4uIIyrW/yOyp831Zf/1BPkUndr658xrKaep+QH4MfmE\njkXAfpXrPhf4PHB3x7H3xcC3yRM0VY7Hysm3k4EfR8RNpew0ciyE8yLi/ko52wCHk9ux3dXxlMmI\nOHPIZzQxypxfjYzctz85mu7j5Mr/F3IjOqJyxi3kvXZPkPeOzwI2qphxPDn64CPk/YlTyTNkami5\n7UKOSPs4uXGeBGzcgxmvAcaQjcqnyPugxwL/UjnnbWSPiLnls58MvKVyxt7k7RCPk90Xfw/s38Bn\n3zkvi8r3bc/KGePJx39UnfYl5HwQmFa+X7eSg94taGoegG8CTzc0LzcD41pYZpcAUxvOOIu8D25U\nC/NzC3BtA/Uu9vmT91lPIw/6V28gbwvyqut9Tc1DKbsXuKBf2TPAfzaU18g6Qw4OOKHsk6eTB8uD\nGlG533zcX/Ylj5IH4WfQ8eSGyvPyb2V7/wR5a8TlNDOaemPbZPLxTxPLfmVeWU8O7+Gc9YH/IS/A\nzCJHvV+/csbhZZvyZNnv30Y+PrH2vHTmzCYvME1sOKex+SlZ02jmqSBbluXztY6yzcgTP/9VMeeY\nsoxGdpRtWLY3Z1fMeX/ZlvzTqxcyVALMFiPpleRo1HtExB3dnh6zlVW6908EPhE91k19IJJ+AWwb\nEdt2e1qGKkmzgd0iu+Q1lTEWmBERhzSVUXJENsSuaTrLVh2SxgPrRcRru5Q/mhxrYdeI+E03psGG\nJ0mHkVd+d4yIh7o9PWZL427qtkSRXdQ27/Z0mA1WRMwiu1/2JEkHAq8jr2CJHGBlf7KLtA0gImqO\nbP13ZUTj1cjP4PXA9k3kdOQdRd7DN5LsHWM2nPT0feM2NEXET8hxPcyGPDfGzcyGtvXI0U1fRG6z\n7yKv8p/ezYlahX2OHBdiCrB3RNzTVFDp1XEi2eXypIg4r6ksW2V1u3tkt/PNzLrK3dTNzMzMzMzM\nWuZHm5mZmZmZmZm1zI1xMzMzMzMzs5a5MW5mZmZmZmbWMjfGzczMbEiRtFa3p8HMzKxpboybmZlZ\n4yQdKekhSZst431Tge8voXw/SbMkvbqxiTQzM2uRG+NmZma2VJJOkDRb0iJJCyX9TdLlkvZcgWrW\nAdZl2Y9VHTFA+drl79dcgUwzM7Mhy41xMzMzW5bNAQGHA0eQV663BMZKOmx5KoiIbwLPjYgHVmYC\nIuJCYN2I+N3K/L2ZmdlQ4+eMm5mZ2VJJOgsYHRGjOsrWAiYBm0XEphWzZgCXR8RyNfLNzMx6la+M\nm5mZ2QqLiCeBMcBISaMk7SDpD5IekzRP0jhJL+97f7lnfJGkF3SUbSHp4tIF/iFJPye7oy9G0qGS\n/iRpgaSpkg5pZSbNzMwatKz7tszMzMwGsiWwCJgFbAxcC9wLbAIcDfwMeGl5b5QXAJLWAK4DRgKn\nA48Ce5P3hdPxvo+V348FfgTsC/y3pIci4uqmZszMzKxpboybmZnZ8pCkTcgB1EYB7wIOBs6LiIXA\n7WQDvO/Nc4D/kLR1RNy1hPreD2wF7BkR40rZyaWbel8dawAnAOMiYp9S9h1gGvAJwI1xMzPrWW6M\nm5mZ2fLYCJjer+wi4KOdBZJWB9YHppSiTYAlNcZ3Bx7qaIgvyXbk6OqXlxMBkAPJTQT2WKGpNzMz\nG2LcGDczM7PlMRt4Z/n3YcDbgRMjYh6ApF2Ak4FXkw3m+eW9A41PMwKYMcDv+mxUfn4LOLXf7xYu\n74SbmZkNRW6Mm5mZ2fJYGBHXAEiaBLwBOFvSa8iG9RXAPcAHgJnAjsBxS6lvJrD9MjJnlZ8nATf0\n+4U/kQkAAAFKSURBVJ0fB2NmZj3NjXEzMzNbIRHxuKTDgSuBfwd+Qw689o2IOBdA0jzg+KVUczXw\nLknvjYhzBnjPrcAc4EUR8YVqM2BmZjYEuDFuZmZmKywixki6Avgi8Aqy0fxlSc8HHillS7t6fS7w\nceBMSTsBvwc2ANbryFgg6cvAKZI2AsYBc8lR3H8WEVP+qVYzM7Me4ca4mZmZLY8lNayPJRvRRwJv\nA04ku6avRT6q7LfAw0usLGKhpN3I0dLfQd6HPh+4E5jQ8b5TJc0kr8B/FXiS7A5/aZW5MjMz6xJF\n+JYrMzMzMzMzszYNNMKpmZmZmZmZmTXEjXEzMzMzMzOzlrkxbmZmZmZmZtYyN8bNzMzMzMzMWubG\nuJmZmZmZmVnL3Bg3MzMzMzMza5kb42ZmZmZmZmYtc2PczMzMzMzMrGVujJuZmZmZmZm17P8BChHp\nAIicgVsAAAAASUVORK5CYII=\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x7f14a5dafa20>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"letters.sort_values('letter').set_index('letter').plot.bar(rot=0)\n",
"plt.xlabel('Raidė')\n",
"plt.ylabel('Dažnumas')"
]
},
{
"cell_type": "code",
"execution_count": 179,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>word</th>\n",
" <th>freq</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2201</th>\n",
" <td>ąžuolas</td>\n",
" <td>2714</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6840</th>\n",
" <td>ąžuolo</td>\n",
" <td>796</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8926</th>\n",
" <td>ąžuolų</td>\n",
" <td>596</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12834</th>\n",
" <td>ąžuolai</td>\n",
" <td>394</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18890</th>\n",
" <td>ąžuolynas</td>\n",
" <td>253</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24815</th>\n",
" <td>ąžuolynė</td>\n",
" <td>184</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27016</th>\n",
" <td>ąžuolinis</td>\n",
" <td>166</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27776</th>\n",
" <td>ąžuolinė</td>\n",
" <td>160</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28319</th>\n",
" <td>ąžuolyno</td>\n",
" <td>157</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38602</th>\n",
" <td>ąžuolynai</td>\n",
" <td>107</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39164</th>\n",
" <td>ąžuolytė</td>\n",
" <td>105</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41303</th>\n",
" <td>ąžuoliukas</td>\n",
" <td>98</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43404</th>\n",
" <td>ąžuolija</td>\n",
" <td>92</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44040</th>\n",
" <td>ąžuolą</td>\n",
" <td>90</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48267</th>\n",
" <td>ąžuoliniai</td>\n",
" <td>80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50930</th>\n",
" <td>ąžuolu</td>\n",
" <td>75</td>\n",
" </tr>\n",
" <tr>\n",
" <th>52253</th>\n",
" <td>ąžuolais</td>\n",
" <td>73</td>\n",
" </tr>\n",
" <tr>\n",
" <th>56097</th>\n",
" <td>ąžuolynų</td>\n",
" <td>66</td>\n",
" </tr>\n",
" <tr>\n",
" <th>67552</th>\n",
" <td>ąžuolyne</td>\n",
" <td>52</td>\n",
" </tr>\n",
" <tr>\n",
" <th>82624</th>\n",
" <td>ąžuolpamūšės</td>\n",
" <td>41</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" word freq\n",
"2201 ąžuolas 2714\n",
"6840 ąžuolo 796\n",
"8926 ąžuolų 596\n",
"12834 ąžuolai 394\n",
"18890 ąžuolynas 253\n",
"24815 ąžuolynė 184\n",
"27016 ąžuolinis 166\n",
"27776 ąžuolinė 160\n",
"28319 ąžuolyno 157\n",
"38602 ąžuolynai 107\n",
"39164 ąžuolytė 105\n",
"41303 ąžuoliukas 98\n",
"43404 ąžuolija 92\n",
"44040 ąžuolą 90\n",
"48267 ąžuoliniai 80\n",
"50930 ąžuolu 75\n",
"52253 ąžuolais 73\n",
"56097 ąžuolynų 66\n",
"67552 ąžuolyne 52\n",
"82624 ąžuolpamūšės 41"
]
},
"execution_count": 179,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"words[words.word.str.startswith('ąžuol')].head(20)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1+"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment