Skip to content

Instantly share code, notes, and snippets.

@PhanDuc
Created May 31, 2018 15:08
Show Gist options
  • Save PhanDuc/cd7105991bed8ce01352ccb6de5f4eab to your computer and use it in GitHub Desktop.
Save PhanDuc/cd7105991bed8ce01352ccb6de5f4eab to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\ProgramData\\Anaconda3\\lib\\site-packages\\gensim\\utils.py:862: UserWarning: detected Windows; aliasing chunkize to chunkize_serial\n",
" warnings.warn(\"detected Windows; aliasing chunkize to chunkize_serial\")\n"
]
}
],
"source": [
"%matplotlib inline\n",
"from comparison import *\n",
"from IPython.display import Image\n",
"from gensim.models import KeyedVectors\n",
"from scipy.spatial.distance import cosine"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### K-mer generation"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"k = generate_kmers(100, [4,5,6,7])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Needleman-Wunsch distance"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"d = get_distances(k, nw_distance)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### dna2vec distance"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"fp = \"dna2vec-20161219-0153-k3to8-100d-10c-29320Mbp-sliding-Xat.w2v\"\n"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"mk_model = KeyedVectors.load_word2vec_format(fp, binary=False)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"vectors = [mk_model[a] for a in k]"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"100"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(vectors)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(100,)"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vectors[0].shape"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(100,)"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vectors[1].shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To compute your distance, write a function that returns it and feed into get_distances. Then use compare_spearman to compute Spearman's rho and p-values."
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"d2v = get_distances(vectors, lambda a,b: 1 - cosine(a,b))"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(0.48156159086731043, 7.818472421313102e-286)"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"compute_spearman(d,d2v)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEKCAYAAAD9xUlFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3Xt4VNXZ///3nQQTCAQEFHhAxQNoMB4iCCqpElCUnz4VW7QNWqxEEIW0FkGRVKr2yqNQQv2KHIrG1mNarS2CFdFCQAFRxAPmUA4KIioqiGBECEnu3x+zM52EHPaE2dmZcL+ua67M7Nmz8kkgc89ee+21RFUxxhhjAGL8DmCMMab5sKJgjDEmyIqCMcaYICsKxhhjgqwoGGOMCbKiYIwxJsiKgjHGmCArCsYYY4KsKBhjjAmK8ztAuDp37qw9e/b0rP3vv/+exMREz9r3muX3TzRnB8vvN6/zr1+/fpeqHtfQflFXFHr27Mk777zjWfsrVqxg0KBBnrXvNcvvn2jODpbfb17nF5FP3Oxn3UfGGGOCrCgYY4wJsqJgjDEmyIqCMcaYICsKxhhjgqwoGGOMCbKiYIwxJsiKgjHGmKCou3jNGGOimYiE/RpV9SBJ7awoGGNME6rrDV5EmvTNvy7WfWSMMSbIioIxxpggKwrGGGOCrCgYY4wJsqJgjDEmyIqCMcaYIE+LgohcISIbRWSLiEyp5fkTRaRARN4TkQ0i8v95mccYY0z9PCsKIhILzAGGAX2ADBHpU2O33wLPqWoq8HNgrld5jDHGNMzLI4X+wBZV/VhVy4C/AlfX2EeBJOd+e+BzD/MYY4xpgJdXNHcHPg15vAMYUGOfe4FXRSQLSAQu9TCPMcaYBnhZFGqb4KPmNdwZwF9UNVdELgSeEpEUVa2s1pDIWGAsQJcuXVixYoUXeQEoLS31tH2vWX7/RHN2sPzNQbPIr6qe3IALgaUhj+8G7q6xTxFwQsjjj4Hj62u3b9++6qWCggJP2/ea5fdPNGdXtfx+C7wde9r+O+rivdvLcwrrgF4icrKIHEPgRPKiGvtsB4YAiEgykAB87WEmY4wx9fCsKKhqOTABWAqUEBhlVCQi94vIj53d7gDGiMgHQD7wS6eiGWOM8YGnU2er6svAyzW2TQu5XwwM9DKDMcYY9+yKZmOMMUFWFIwxxgRZUTDGGBNkRcEYY0yQFQVjjDFBVhSMMcYEWVEwxhgTZEXBGGNMkBUFY4wxQVYUjDHGBFlRMMYYE2RFwRhjTJAVBWOMMUFWFIwxxgRZUTDGGBNkRcEYE9Xy8/NJSUlhyJAhpKSkkJ+f73ekqObpIjvGGOOl/Px8srOzycvLo6KigtjYWDIzMwHIyMjwOV10siMFY0zUysnJIS8vj/T0dOLi4khPTycvL4+cnBy/o0UtKwrGmKhVUlJCWlpatW1paWmUlJT4lCj6WfeRMSZqJScnc99997Fw4UJKSkpITk5m+PDhJCcn+x0tatmRgjEmaqWnp/PAAw+we/duAHbv3s0DDzxAenq6z8milxUFY0zUWrhwIUlJSSQkJKCqJCQkkJSUxMKFC/2OFrWsKBhjotaOHTu4+OKL+eKLL1BVvvjiCy6++GJ27Njhd7SoZecUjDFRbfHixcycOZM+ffpQXFzMpEmT/I4U1exIwRgT1dq2bUtqaipxcXGkpqbStm1bvyNFNTtSMMZEtYSEBEaPHs0nn3zCSSedREJCAt99953fsaKWHSkYY6JWfHw8vXv3rnZOoXfv3sTHx/sdLWpZUTDGRK1LLrmE1atXM3r0aBYvXszo0aNZvXo1l1xyid/RopZ1HxljotZnn33G8OHDefzxx5k3bx7x8fEMHz6czZs3+x0tallRMMZErZKSEt577z1atWrFihUrGDRoEIcOHSIhIcHvaFHLuo+MMVErOTmZVatWVdu2atUqm+biCHhaFETkChHZKCJbRGRKHftcJyLFIlIkIs96mccY07JkZ2eTmZlJQUEB5eXlFBQUkJmZSXZ2tt/RopZn3UciEgvMAS4DdgDrRGSRqhaH7NMLuBsYqKp7ROR4r/IYY1qeqjUTsrKyghPi5eTk2FoKR8DLcwr9gS2q+jGAiPwVuBooDtlnDDBHVfcAqOpXHuYxxrQAIlLnc0VFRYwcOZKRI0dW266qXsdqMbzsPuoOfBryeIezLVRvoLeIrBaRtSJyhYd5jDEtgKrWemvoOeOOl0cKtZXzmv86cUAvYBDQA3hDRFJU9dtqDYmMBcYCdOnShRUrVkQ8bJXS0lJP2/ea5fdPNGeH6M8PWP4I8LIo7ABOCHncA/i8ln3WquohYKuIbCRQJNaF7qSqC4AFAP369dNBgwZ5lTk4rC1aWX7/RHN2iP78gOWPAC+7j9YBvUTkZBE5Bvg5sKjGPguBdAAR6UygO+ljDzMZY4yph2dFQVXLgQnAUqAEeE5Vi0TkfhH5sbPbUmC3iBQDBcBkVd3tVSZjjDH18/SKZlV9GXi5xrZpIfcVmOjcjDHG+MymuTDmKJefn09OTk5wnH92draN8z9ClZWVfPPNN2G/bteuXa737dSpU73DcxurwaIgIuOBZ6pGBInIsUCGqs6NeBpjTJPKz88nOzubvLw8KioqiI2NJTMzE8AKwxHIzc1l2rRpJCYmhvW6M844w9V+paWlzJs3j5tuuqkx8erl5khhjKrOqXrgXHk8BrCiYEyUy8nJIS8vj/T09ODoo7y8PLKysqwoHIH9+/dz5513ct9997l+TTijv8aPH8/+/fsbma5+bk40x0jIMYozfcUxnqQxxjSpkpIS0tLSqm1LS0ujpKTEp0TGb26OFJYCz4nIfAIXn40DXvE0lTGmSSQnJ3PdddexZMkSDh48SHx8PMOGDbNZRo9ibo4U7gKWA7cC44FlwJ1ehjLGNI3u3buzcOHCaiuXLVy4kO7da85IY8LRo0cP3n//fc/af++99+jRo4cnbTdYFFS1EvgLkK2qP1XVP6lqhSdpjDFNauXKlVx//fW8/vrrXH311bz++utcf/31rFy50u9oUS0jI4M333yTjRs3RrztNWvWsHPnTq666qqItw0uioJzodn7OF1GInKuiNS8MtkYE4UOHjzIggULKCwsZNmyZRQWFrJgwQIOHjzod7So1qZNG2699Vb++Mc/Rrzt3NxcJk6cSGxsbMTbBnfdR78jMA32twCq+j7Q05M0xpgmFR8fz/z586ttmz9/PvHx8T4lajnGjx/P3/72N776KnIrAmzZsoXXX3/dk6GoVdwUhXJV3etZAmOMb8aMGcPkyZPp2rUr6enpdO3alcmTJzNmzBi/o0W9448/nmuvvZa5cyM3ev+Pf/wjY8eODfv6h3C4KQqFIjISiBWRXiIyG1jjWSJjTJO56KKLiI2N5csvvwTgyy+/JDY2losuusjnZC3DxIkTmTt3bkSuKdi1axfPPvssEyZMiECyurkpClnAmcBB4FlgL3C7l6GMMU1jwoQJVFRUkJuby5IlS8jNzaWiosLzN56jxRlnnMEFF1zAk08+ecRtzZs3j5/85Cd069YtAsnq5qYonK6q2ap6vnP7raoe8DSVMaZJfPPNNzz44INMnDiRhIQEJk6cyIMPPtioeXtM7SZNmsSsWbOoqGj8oM0DBw4wZ84cJk70fu5QN0Vhloj8R0R+LyJnep7IGNOkvv76a1JSUhgyZAgpKSl8/fXXfkdqUX70ox/RoUMHFi9e3Og2nnrqKfr27cuZZ3r/FuzmOoV0Astlfg0sEJEPReS3XgczxngvJiaGmTNnsmvXLlSVXbt2MXPmTGJivFx/6+giIkyaNInc3NxGvb6yspJZs2YxadKkCCernat/eVXdqaoPE5ji4n1gWgMvMcZEgdatW6OqlJWVISKUlZWhqrRu3drvaC3KT37yE3bs2MHatWvDfu3LL79MmzZtmmypTjcXryWLyL0iUgg8QmDkkTfXVxtjmtT3339Pz5492bNnD5WVlezZs4eePXvy/fff+x2tRYmLi+M3v/lNo44WZs6cyaRJkzxZO6E2bo4U/gzsAYaq6iWqOk9VI3c1hjHGV9u2baNr167ExMTQtWtXtm3b5nekFmn06NEUFBTw0UcfuX7NunXr2Lp1KyNGjPAwWXUNzpKqqhc0RRBjjD9EhMmTJ9OnTx+Ki4uZNGkSgZVyTSS1bduWyy+/nFOfOq/W5wcBrKi+7Xzgk5uAVq08zRaqzqIgIknA3QS6il5W1fyQ5+aq6m1NkM8Y47GkpCRmz57NJ598wkknnURSUhJ799okBpGmqqxZs4YPp6zirLPOOuz52hbZWb58OVOmTOHte5smI9TfffRnQIAXgAwReUFEqiZEsaMHY1qIPn368MUXX6CqfPHFF/Tp08fvSC1ScXExlZWVpKSkuH5NWloamzdvZufOnR4mq66+onCqqk5R1YWq+mPgXWC5iHRqomzGGI8lJiby5ptv0qZNG0SENm3a8Oabb3o6t87R6qWXXuJ///d/wzphfMwxxzB06FBefvllD5NVV19RiBeR4POqmgMsAF4HrDAY0wJUzYa6Z88eVJU9e/ZU224i56WXXmrUGghXXXUVL730kgeJaldfUVgMDA7doKpPAHcAZV6GMiaa5OfnV7siOD8/v+EXNRNV01lUzc1f9dWmuYis3bt3s2HDhkZdazBs2DCWLVvWZGtc1HmiWVVrXXJTVV8BenmWyJgokp+fT3Z2Nnl5eVRUVBAbG0tmZiYQWH0rGsTExATn5amoqCAmJobKykqfU7Usr7zyCunp6SQkJIT92s6dO5OSksLKlSsZOnSoB+mqc3VFs4hcKSJ3isi0qpvXwYyJBjk5OeTl5ZGenk5cXBzp6enk5eWRk5PjdzTXKisradeuHTExMbRr184KggcWL158RMtnXnXVVUc0d1I43FzRPB/4GYEptAW4FjjJ41zGRIWSkhLS0tKqbUtLS6OkpMSnRI3Tpk0bVJU2bdr4HaXFOXToEEuXLuXKK69sdBtV5xWa4voRN0cKF6nqKGCPqt4HXAic4G0sY6JDcnIy1113HQkJCcHugeuuu47k5GS/o4VFRII3E1mrV6/mtNNOO6J1EFJSUlBViouLI5isdm6Kwg/O1/0i8j/AIeBk7yIZEz26d+/OwoULg5+w27Rpw8KFC+nevbvPyepWWwHYuXMnlZWV1cbDW6GIjMaOOgolIk02CslNUXhJRDoAfyBwrcI24K9ehjImWixfvpzExETat29PTEwM7du3JzExkeXLl/sdrU6qGrxVXY9w7LHHVvuamJhYbT/TeJEoCtB0Q1PdrKfwe1X9VlVfIHAu4QxVvcfzZMZEgfLycm677bbgm2tiYiK33XYb5eXlPidz59FHH6V169bB6xP27NlD69atefTRR31O1jJs3ryZffv2kZqaesRtDRo0iA0bNrB79+4IJKtbgxPiAYjIRUDPqv1FBFU98kVHjWkBHnvsMV544YXgkNSf/vSnfkdyrWrYbE5ODkVFRZx55plkZ2dHzXDa5u6ll17iyiuvjMiiRVXnrZYsWcINN9wQgXS1czP66ClgJpBGYNK+84F+bhoXkStEZKOIbBGRKfXsN0JEVERctWtMcxETE8O3335LRkYGl112GRkZGXz77bdRtXJZRkYGhYWFABQWFlpBiKBIdR1VaYouJDdHCv2APhpmx6KIxAJzgMuAHcA6EVmkqsU19msH/Ap4K5z2jWkOqsb0f/nll9W+Wj+82bt3L+vWrePSSy+NWJtXXnklkydP5tChQ7TyaDptNx9nCoGujWi7P7BFVT9W1TICJ6evrmW/3wMzgAON+B7G+CouLo7ExER69uxJTEwMPXv2JDExkbg4Vz2zpgV79dVXSUtLi+jkgt26deO0005j9erVEWuzJjdFoTNQLCJLRWRR1c3F67oDn4Y83uFsCxKRVOAEVW262Z6MiaDy8nLKysrYtm0blZWVbNu2jbKysqg50Wy8E+muoypedyG5+ThzbyPbrm1wc/CY2pmB9Y/ALxtsSGQsMBagS5curFixopGRGlZaWupp+16z/E3v0KFDtG3bltLS0uBXIOp+DojOzKGaS/6KigpefPFFhg0b5jqT2//7Xbt2JS8vz5OCA1QfsxzJG4Ern5eGPL4buDvkcXtgF4HrHrYR6D76HOhXX7t9+/ZVLxUUFHjavtcsf9MCtHXr1rp8+XJ97bXXdPny5dq6dWsN/GlFl2jMHKo55V+9erWeddZZYb3G7f/9iooK7datm27cuDGs9oF31MV7d33LcX5HyCf7WopJUgP1Zh3QS0ROBj4Dfg6MDHn9XgJdU1XfbwUwSVXfaaBdY5qVgwcPMnjwf2eZj6aRR8YbK1eujOgJ5lAxMTFceumlrFy5kt69e0e8/fqmzm4HICL3AzuBpwh0CV0PtGuoYVUtF5EJwFIgFnhcVYuc9t5RVTfnJYxp9mrOKmqzjJqysjLatWvwbbLR2rVrR1mZN8vauDmncLmqDgh5PE9E3iIwYqheqvoy8HKNbbVOu62qg1xkMcYY4yE3RaFCRK4nMKRUgQygwtNUxpij2r59+1i8eHHY13s8/fTTrvcdMGAAvXrZemE1uSkKI4H/59wUWE3IuQFjDLRu3ZoDBw6QkJDADz/80PALTL1ee+01pkyZwiWXXBLW61555RVX+23atIl+/foxd+7cxsRr0RosCqq6jdovOjPGOKoKgRWEyOnfv39Yn/xvvvlm12sgz5s3jw0bNjQyWcMef/xxtm/fzr///W/Xr9m7dy/t27d3te9HH33EWWed1dh49bLLLo0xJsKeffZZtm/fzoknnuj6NWlpaaxatcr1/ueee25jojXIioIxxkTYwIEDGThwYKNe5zcbUG2MMSaovovXJtb3QlWdFfk4xhhj/FRf95F3V14YY4xpluq7ovm+pgxijDHGfw2eaBaRBCATOBNIqNquqqM9zGVM1IiNjaWioqLOx8ZEEzcnmp8isMjO5cBKoAfwnZehjGnORCR4Aw4rAFWPa+5nTDRwUxROU9V7gO9V9QngSsCbqybMUSs/P5+UlBSGDBlCSkoK+fn5fkeqU+g0w1Vv+Mcee2y1ryJScyp5Y6KCm6JwyPn6rYikEFgHoadnicxRJz8/n1tuuYVNmzZRWVnJpk2buOWWW5p1Yagyfvx4RIQ9e/YAsGfPHkSE8ePH+5zMmMZxUxQWiMixwD3AIqAYFzOkGuPWhAkT2L9/Pw8++CBLlizhwQcfZP/+/UyYMMHvaA2aPXs248ePJz4+HoD4+HjGjx/P7NmzfU4W3RITE3n33XcpKCiIeNtbtmzhn//8Z0TXTm5R3KzE05xutvJa/aIxP6AzZsxQ1f/mnzFjRrNaScuNaMtbU3PKX1FRoc8884yeeuqpOmTIEF27dm2Dr2no//6nn36qY8eO1U6dOun999+v+/bti1DayPD694/LldcaPFIQkXgRGSkiU0VkWtXN21JljjYpKSn1PjZHl5iYGEaOHElJSQk/+9nPGDFiBMOHD+fDDz8Mu62vv/6aiRMncs4553DssceyceNG7rnnHk8XwYlmbrqPXiQwS2o58H3IzZiIiIuL49prr+Xkk09myJAhnHzyyVx77bXExdnUXEe7Vq1aMWbMGDZv3sxZZ53FOeecw5w5c1y/fv369ZxyyimUlJRQWFjIgw8+SKdOnTxMHP3c/NX1UNUrPE9ijlqDBw/m1Vdf5ZhjjqGyspK9e/fy/fffM3ToUL+jmWbg888/Jzc3lz//+c9kZmYy/uupcO/Uw/YbBLCi+ra+wHd3xNBl3rtMmDCBqVOn0rdvX+9DRzE3RWGNiJylquEftxnjwmeffcbw4cNZsmQJAPv372f48OFs3rzZ52TGT1u3bmX69Ok899xzjBo1ig0bNtCjRw/g0Vr3X7FiRZ3rKXw8+XseffRRrr76alJSUsjOzuZHP/qRd+GjmJvuozRgvYhsFJENIvKhiHi3OoU56pSUlNCtW7dq27p160ZJSYlPiYzf3nrrLfr27UunTp3YuHEjDz30kFMQGicxMZHbb7+djz76iBEjRnDjjTcyY4YNoqyNmyOFYZ6nMEe1Dh06sGDBAmbMmEGfPn0oLi7mzjvvpEOHDn5HMz7ZsWMH6enp5OTkRLTd+Ph4br75Zg4dOuTpymvRrL6ps5NUdR82pYXx2L59+0hKSiI1NZWKigpSU1NJSkpi3759fkczJuLqm/akrue0Ca+Kr6/76Fnn63rgHefr+pDHxkREeXk5ubm5ZGVlcfnll5OVlUVubi7l5eV+RzMm4uq6PqCgoKDO55pSfVNnX+V8Pbnp4pijRc1PRKNH/3fS3aKiouDj0P2a+o/DmKORm4vXzqvldqqI2CBy02ihn4ImTJhAXFwcubm5AOTm5hIXF8eECRNsUjljmpibN/a5wHnABkAIzJD6AdBJRMap6qse5jMu5efnk5OTQ0lJCcnJyWRnZ5ORkeF3LFeq5gmaOnVq8Ou4ceNs/qAj8MMPP/DAAw9QVlYW1uumTJniet++ffty7bXXhhvNNHNuisI2IFNViwBEpA8wGfg98A/AioLP8vPzyc7OJi8vj4qKCmJjY8nMzASIqsIwe/ZsRIQDBw74HSfqbd++nblz5zJp0qSwXud2xNfWrVt5+OGHrSi0QG6KwhlVBQFAVYtFJFVVP7bFQ5qHnJwc8vLySE9PD17Ak5eXR1ZWVtQUBRN5nTp1CuuT/wUXXFDnxV81vfHGG8EjO9OyuCkKG0VkHvBX5/HPgE0iEs9/11owPiopKSEtLa3atrS0NLv4yxgTNjdXNP8S2ALcDvwG+NjZdghI9yqYcS85OZlVq1ZV27Zq1SqSk5N9SmSMiVYNHimo6g9ArnOrqTTiiUzYsrOzyczMDJ5TKCgoIDMzM+JXgxpjWr76rmh+TlWvE5EPgcPGA6rq2Z4mM65VnTfIysoKjj7Kycmx8wnGmLDVd6Twa+frVY1tXESuAP4fEAs8pqoP1nh+InAzgbUavgZGq+onjf1+R5P6TvIXFRUxcuRIRo4cWW27jfU3xjSkviuavxCRWCBPVS8Nt2HntXOAy4AdwDoRWaSqxSG7vQf0U9X9InIrgbWffxbu9zoa1fUGLyL25m+i3rJly/jHP/7Beeed5/o1paWltG3b1tW+X3/9Nddcc01j47Vo9Z5TUNUKEdkvIu1VdW+YbfcHtqjqxwAi8lcCK7gFi4Kqhq7KvRa4IczvYYxpgSZPnszgwYM55ZRTXL+mb9++rF+/3vX+4bR9NHEzJPUA8KGIvEbIMpyq+qsGXtcd+DTk8Q5gQD37ZwJLantCRMYCYwG6dOnCihUrGk7dSKWlpZ623xQsv3+aS/bt27fzww8/hJUnnP/7GzZsYO/evZ7+vJ07dw57ptxw9n///ffDjeSp5vLe46Yo/Mu5hau2Tu9a+zVE5AagH3BJbc+r6gJgAUC/fv3U7QU2jVHf6k3RwvL7p7lk37hxI61btw4rTzj/92NjY2nfvn2z+XmrNLc84Wgu7z1uisLfgNMIvKF/pKpu5yDYAZwQ8rgH8HnNnUTkUiAbuERVD7ps2xhjjAfqvHhNROJEZAaBN/cngKeBT0Vkhoi0ctH2OqCXiJwsIscAPwcW1fgeqcCfgB+r6leN/SGMMYerqKigsrLSk7ZtrYuWq74jhT8A7YCTVfU7CKzGBsx0br+u57WoarmITACWEhiS+riqFonI/cA7qrrI+R5tgeedIZbbVfXHR/gzGRMRr776Krt37w7rNfn5+a737dKlC4MHDw43lisdO3YMfj3//PPp378/AwYMoH///nTt2jWstioqKiguLuatt97i7bff5u2332bz5s3V1sAwLUd9ReEqoLeGjG9U1X3O0NH/0EBRcPZ/GXi5xrZpIffDHupqTFO56qqruOaaa4iJcTMbTMCiRYsa3gkoKyujoKCAb775prHx6nXcccexZcsWvvrqKxYtWsTs2bP5v//7PwBGjhzJM88846qdL774gjPOOIN9+/YRFxfHddddx8MPP8yAAQOIj4/3JLvxV31FQbWWAe/OMFUbCG9aPFXl6aefplUrN72lcMstt7g+Ubhnzx5Ph0R+9913jB07ljVr1vDdd98xYMAArrnmGi644AKuWPszuLf9Ya8ZBLCi+rZuwN7fwJOnPMFbb73F2rVrueKKK+jTpw+jR4/mtttu8+xnMP6orygUi8goVX0ydKMzUug/3sYyxhyJzz//nDfeeIPly5fTq1ev6lfAX1H7JUf1jX4ZBYwaNQoILODz9NNP8+STT1pRaIHqKwrjgX+IyGhgPYHRR+cDrYGouRRQVeudEqK5tm3MkUpMTKR3794Rb7d169acccYZEW/XNA/1TXPxGTBARAYDZxK47mCJqi5rqnBHav78+dx6662kpKRwxRVXMHToUI477jgAUlNTw27vvffe46uvvmLp0qW88sorFBcXk5eXZyfcjDEthpups5cDy5sgS8R169aNLl26UFhYSGFhITNnzgw+17FjR0444YTDXvPBBx9wzjnnVNu2fft29uzZc1ghqWrfGGNaCjcXr0WtkpISevXqxTXXXMPSpUvZunVr8LndWeXA1sNfNDyp9u0kIfcFLqE/9dRTGTp0KF26dKFXr16eZDfGGD+06KKwatWq4A2ge/fuXH755fTo0YPf1TF+6v7772fatGm1PvfLX25n6dKlfPTRR8ybNw+ApKQkT/ptW4IXX3yR//wn/DEJ06dPd7VfQkICEyZMIDY2NuzvYYypXYsuCnl5eWzZsoX+/fsfNqywvhPE999/f63bQ0foHjp0iLVr13L66adHJmwL9Lvf/Y5zzz037C42t2P358yZw4gRI+jevXtj4hljatGii0KXLl3qfEOqa80Bt5NStWrVih/96EdHEq9BO3fuZMuWLWG/ruZ6zXWJiYnh/PPPdz0OvzFuv/12zj33XNf7Dxs2zPVY/6effrqRqdzbu3cvnTt3jmibqsq3334b0TaNiZQWXRSi3V133cWbb77J8ccfH9brpkyZ4mq/wsJCXnjhBYYMGdKYeC3eRRddxCmnnEJMTAw9e/YM3lJTU7nxxhtdt7NgwQKKiorYtm0b27ZtY+vWrYgIAwfmEaN4AAARiUlEQVQO9DC9MY1jRaEZq6ioYNq0adxwg/u1h8KZfnfo0KFUVFQ0Ml3Ld/DgQSorK6moqKC0tJRdu3bRqlUrHurwZ7j38OVEBsFhVwRDYCGQEZ9dxq5du/juu+84dOgQcXFxlJWVeZZ93759bNq0ieeee871a4qKivjqK3fzUpaUlDQ2mmnmrCgYU4f169ezc+dOOnbs6OoixfoK8t9D7qsq27dvD6tbLVxVef/+9783sOd/Pf/881x77bWu9x8xYkTYuUzzZ0XBmHokJSVF/Kp1ESEpKSmibdbUr1+/sNfqFpGwjixMy+R++kfT5Nq2bctf/vIXVq5cGfYfeH1KS0tZsGABGzZscL3QuTHm6GBHCs3YjBkzeOKJJ7j11lsREcaNG8eoUaNo3/7wGS7dKCoqYt68eTz77LNcfPHFPPHEE1x44YURTv1fbdu25YYbbuCCCy7gvPPOo2/fvpx99tm0bt067LYqKirYtGkT7777bvC2e/duEhISPEhuzNHLjhSasaSkJLKysigqKmLOnDmsWrWKnj17MmbMmLCGNK5atYpLLrmEyy67jI4dO/LBBx+wcOFCLr/8ck8n9Pv3v//NvHnzaNOmDffffz8XXHAB7dq1IzU1Naz8N998M+3bt6dPnz5kZmbyySefMGHCBD777DM6derkWX5jjkZ2pBAFqmZjbdeuHarK1q1b6fDQSbXuOwgOGwGTBqxMhzO+7EZCQoJnSzTWdOONN/Liiy9y4okn0q9fP04//XR69+7NmWeeGVb+x3rAY5NimVZ5D5s2bWLjxo2MGjWKTp068eGHHzb6yMkYczgrCs3Yjh07mDt3Ls888wzt27fnF7/4BUVFRfVewVvXCBhVJe+yNTz99NP07duXlJQUbrjhBkaPHh3WymLh2LhxI6tXr6Zv376HP5kW/pz+odeZqyr/8z//Q2lpqRUFYyLIikIzds8991BaWsrixYs5++yzj6itqoulBg4cyEMPPcSSJUv49a9/zamnnkp6enqEEh/Oq3mJRMSzYlalvLyc9evXu77ie+PGjbRr187Vvvv27TuSaMZ4xopCM3bo0CGuvvrqIy4INcXHxzN8+HDmzp3LoUOHItp2SzN27FjXReHdd9/lvPPOc9324MGDGxvLGM9YUTCmDo0Z579+/XqP0hjTNGz0kTHGmCArCsYYY4Ks+6gZe+ONN/jyyy/DmrSupKSETz75xNW+n3/+eWOjufLBBx9wxx13hDXHz6effsrixYtd7bt3b+0jmIwxjWdFoRnr2LEj27ZtY/ly90tkP/nkk4waNcrVvueffz7JycmNjdegSy+9lPPOOy+sqb9nzZpVbS3t+kyfPp2uXbs2Np4xphYSyTl1mkK/fv30nXfe8az9cKaebo5EJKLzJDW1aM4fzdnB8vvN6/ceEVmvqv0a2s/OKRhjjAmy7iNjTFSpb76uup6L5iOIpmZHCsaYqKKqtd4KCgrqfM64Z0XBGGNMkBUFY4wxQZ4WBRG5QkQ2isgWEZlSy/PxIvI35/m3RKSnl3mMMcbUz7OiICKxwBxgGNAHyBCRPjV2ywT2qOppwB+B6V7lMcYY0zAvjxT6A1tU9WNVLQP+ClxdY5+rgSec+38HhoiXS4EZY4ypl5dFoTvwacjjHc62WvdR1XJgL2DrKxpjjE+8vE6htk/8NceGudkHERkLjAXo0qULK1asOOJwdSktLfW0/aZg+f0TzdkhuvNH+99uc8nv2TQXInIhcK+qXu48vhtAVR8I2Weps8+bIhIH7ASO03pC2TQX9Yv2S/2jOX80Z4fozx/tf7tHwzQX64BeInKyiBwD/BxYVGOfRcCNzv0RwPL6CoIxxhhvedZ9pKrlIjIBWArEAo+rapGI3A+8o6qLgDzgKRHZAnxDoHAYY4zxiadzH6nqy8DLNbZNC7l/ALjWywzGGGPcsyuajTHGBFlRMMYYE2RFwRhjTJAVBWOMMUFWFFqIrKwsEhISAEhISCArK8vnRKa5EpFabw09Z44OVhSiVM0/2kceeYSDBw8CcPDgQR555BH7wza1skVqTH2sKESp0D/Y+Ph4cnNzq/1h5+bmEh8fb3/YxpiwWFFoAQ4ePMi4ceOqbRs3blzwyCEa5Ofnk5KSAkBKSgr5+fk+JzLm6GRFoQWIj49n/vz51bbNnz+f+Ph4nxKFJz8/n+zsbGbPng3A7Nmzyc7ObraFwfrkTUvm6RXNpmmMGTOGu+66C4A+ffowa9Ys7rrrrsOOHpqT2t4oBw8eXO3ryJEjGTlyZPD55tIFVleOaJ+QzRiwI4UWYfbs2YwbN46pU6cybNgwpk6dyrhx44KfvJuj0HMdMTExlJWVVTsnUlZWRkxMTFScE6nq+hoyZEhUdn1VjVxLT0+3kWvGjhRaitmzZzN79uyo/LSanJzMqlWrSE9PD25btWoVycnJPqZyp6rrKy8vj4qKCmJjY8nMzAQgIyPD53QNy8rKYv78+UyfPp0+ffpQXFwcPOpszh8qjHfsSMH4Ljs7m8zMTAoKCigvL6egoIDMzEyys7P9jtagnJwc8vLySE9PJy4ujvT0dPLy8sjJyfE7miuPPvoo06dPZ+LEiSQkJDBx4kSmT5/Oo48+6nc04xM7UjC+y8jIYM2aNQwbNoyDBw8SHx/PmDFjouKTdklJCWlpadW2paWlUVJS4lOi8NQ1cu2OO+7wKZHxmx0pGN/l5+fzr3/9iyVLlvDaa6+xZMkS/vWvf0VF33xV11eoaOn6gugfuWYiz4qC8V00d8FEc9cX/Hfk2qxZszhw4EBw5NqYMWP8jmZ8Yt1HxnclJSU8//zz1bqPRo8eHRVdMFVdXFlZWZSUlJCcnExOTk5UdH3Bf08mT506Nfi7b+4j14y3rCgY33Xo0IE//elP/OEPfwiOgJk8eTIdOnTwO5orGRkZZGRkROXIL4jukWsm8qwoGN/t27ePDh06kJqaSkVFBampqXTo0IF9+/b5Hc2Yo44VBeO78vJycnNzq3XB5ObmctNNN/kdzZijjp1oNr6Lj4/nm2++obCwkGXLllFYWMg333xjI2CM8YEdKRjfRePcTca0VFYUjO9sBIwxzYd1H5lmYfbs2Rw4cICCggIOHDhgBcEYn1hRMMYYE2RFwRhjTJAVBWOMMUFWFIwxxgRZUTDGGBMkzXmZw9qIyNfAJx5+i87ALg/b95rl9080ZwfL7zev85+kqsc1tFPUFQWvicg7qtrP7xyNZfn9E83ZwfL7rbnkt+4jY4wxQVYUjDHGBFlRONwCvwMcIcvvn2jODpbfb80iv51TMMYYE2RHCsYYY4KsKDhE5HER+UpECv3OEi4ROUFECkSkRESKROTXfmcKh4gkiMjbIvKBk/8+vzM1hojEish7IvKS31nCJSLbRORDEXlfRN7xO0+4RKSDiPxdRP7j/B1c6Hcmt0TkdOf3XnXbJyK3+5bHuo8CRORioBR4UlVT/M4TDhHpBnRT1XdFpB2wHhiuqsU+R3NFRARIVNVSEWkFrAJ+raprfY4WFhGZCPQDklT1Kr/zhENEtgH9VDUqx/mLyBPAG6r6mIgcA7RR1W/9zhUuEYkFPgMGqKqX12PVyY4UHKr6OvCN3zkaQ1W/UNV3nfvfASVAd39TuacBpc7DVs4tqj6tiEgP4ErgMb+zHG1EJAm4GMgDUNWyaCwIjiHAR34VBLCi0OKISE8gFXjL3yThcbpe3ge+Al5T1ajKDzwE3AlU+h2kkRR4VUTWi8hYv8OE6RTga+DPTvfdYyKS6HeoRvo5kO9nACsKLYiItAVeAG5X1X1+5wmHqlao6rlAD6C/iERNF56IXAV8parr/c5yBAaq6nnAMGC8050aLeKA84B5qpoKfA9M8TdS+Jxurx8Dz/uZw4pCC+H0xb8APKOq//A7T2M5h/0rgCt8jhKOgcCPnX75vwKDReRpfyOFR1U/d75+BfwT6O9vorDsAHaEHF3+nUCRiDbDgHdV9Us/Q1hRaAGcE7V5QImqzvI7T7hE5DgR6eDcbw1cCvzH31TuqerdqtpDVXsSOPxfrqo3+BzLNRFJdAYo4HS7DAWiZhSequ4EPhWR051NQ4CoGGRRQwY+dx1B4LDLACKSDwwCOovIDuB3qprnbyrXBgK/AD50+uUBpqrqyz5mCkc34Aln5EUM8JyqRt2wzijWBfhn4LMFccCzqvqKv5HClgU843TBfAzc5HOesIhIG+Ay4Bbfs9iQVGOMMVWs+8gYY0yQFQVjjDFBVhSMMcYEWVEwxhgTZEXBGGNMkBUFEzEioiKSG/J4kojcG6G2V4hIP+f+NhHpHIl2jzDTP0VkeMjjjSLy25DHL4jIT5ogR7P4fZiWwYqCiaSDwE+OojeoNcBFACLSicAsu6FTNl/o7NPiiIhd49RCWVEwkVROYEnB39R8wrlq+QURWefcBjrbE521LNY5k5ld7WxvLSJ/FZENIvI3oHVt31BEbnDWYnhfRP7kXACHiJSKyHRngrd/i0h/52jjYxH5sbNPTxF5Q0TedW5Vb/CDnH2r5ud/xrlqvKbVOEXB+foScJwEnAz8oKo7ReSXIvJISOaXRGRQSM4cCawlsVZEujjbrxWRQmf76862WBGZKYF1DzaISFZIliznZ/hQRM6o5fd0ZsjvaYOI9HK2j3IefyAiTznbThKRZc72ZSJyorP9LyIyS0QKgOl1/duZKKeqdrNbRG4EPiknAduA9sAk4F7nuWeBNOf+iQSm5AD4P+AG534HYBOQCEwEHne2n02g4PRzHm8DOgPJwGKglbN9LjDKua/AMOf+P4FXCUzJfQ7wvrO9DZDg3O8FvOPcHwTsJTA5XwzwZlX2Gj9vPPAtcAzwAIH5mp4C+gDXE1ibA+CXwCMhr3sJGBSS83+d+zOA3zr3PwS6V/1enK+3EpjfKs553DHk95Hl3L8NeKyWrLOB6537xxAosmcCG4HONdpbDNzo3B8NLHTu/8XJHlvfv53f/w/tdmQ3OwQ0EaWq+0TkSeBXwA8hT10K9An5wJ3kzLczlMBkcpOc7QkEisbFwMNOmxtEZEMt324I0BdY57TbmsDU2wBlQNVUDR8CB1X1kIh8CPR0trcCHhGRc4EKoHdI22+r6g4AZ+qQngQW/wn9WQ+KSBGBydcuIPCmfgqBo4ZU3HUdlRF4o4XA4kiXOfdXA38RkeeAqgkOLwXmq2q58/1D1//4R0gbtZ3HeBPIlsC6D/9Q1c0iMhj4uzoL64S0d2FIG085P1eV51W1wrlf179diYuf2zRTVhSMFx4C3gX+HLItBrhQVUMLRdVkfj9V1Y01tkPDC+0I8ISq3l3Lc4fU+QhLYI2DgwCqWhnSH/4b4EsCRw8xwIGQ1x8MuV8BxInIAOBPzrZpqrqIwBv/xUA7Vd0jImuBCQSKwnxn33Kqd9Um1JGzAudvUlXHOd/vSuB9p3AJdf9OqvIG2wilqs+KyFtOe0tF5OYG2qv28pD734fcr/XfzkQ3O6dgIs75xPkckBmy+VUCb5YAOG9yAEsJ9IeLsz3V2f46gS4YJLC2wtm1fKtlwAgROd7Zr6OInBRG1PbAF6paSWBCwdgGfq63VPVc57bI2byawCRmHziPNxA4ajgRKHK2bQPOFZEYETkBF9NSi8ipzvebBuwCTiDwOxxXVdREpKPbH1RETgE+VtWHgUUEfp/LgOuck+Sh7a0hMNsrBP4NVlG7uv7tTBSzomC8kkug37/Kr4B+zsnLYmCcs/33BLpxNohIofMYYB7Q1uk2uhN4u+Y30MAa1L8lsGLYBuA1AjOuujUXuNH5dN+b6p+C3VpDoMvoTSdTOYEurHecYgOBwrGVQDfWTAJHUQ35g3PSuJBAgfyAwFKf2wn8rj4ARoaR82dAodMVdgaB8x1FQA6w0mmvatr1XwE3Ob/TXwC/rqPNuv7tTBSzWVKNMcYE2ZGCMcaYICsKxhhjgqwoGGOMCbKiYIwxJsiKgjHGmCArCsYYY4KsKBhjjAmyomCMMSbo/wepJeZkDy+BeQAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"boxplot_of_distances(d, d2v, \"Original dna2vec\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"import keras\n",
"import pickle\n",
"import numpy as np\n",
"from keras.models import load_model\n",
"from keras.preprocessing.sequence import pad_sequences"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\ProgramData\\Anaconda3\\lib\\site-packages\\keras\\models.py:282: UserWarning: No training configuration found in save file: the model was *not* compiled. Compile it manually.\n",
" warnings.warn('No training configuration found in save file: '\n"
]
}
],
"source": [
"tokenizer = pickle.load(open(\"D:\\\\Computer_Science_Project\\\\Encoder-Decoder\\\\drive-download-20180530T181225Z-001\\\\tokenizer.pk\", 'rb'))\n",
"#\n",
"test_encoder_model = load_model(\"D:\\\\Computer_Science_Project\\\\Encoder-Decoder\\\\drive-download-20180530T181225Z-001\\\\encoder_model_GRU_5epochs_6000words_adam.h5\")"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"maxlen = 11"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"headlines = tokenizer.texts_to_sequences(k)\n",
"headlines = pad_sequences(headlines,maxlen=11)"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(100, 11)"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"headlines.shape"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"k100 = test_encoder_model.predict(headlines)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"numpy.ndarray"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(k100)"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(100, 100)"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"k100.shape"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"result = list(k100)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(100,)"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result[0].shape"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(100,)"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vectors[0].shape"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(0.1710238736087488, 8.383229952352799e-34)"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d2v = get_distances(result, lambda a,b: 1 - cosine(a,b))\n",
"compute_spearman(d,d2v)"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEKCAYAAADjDHn2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3X2YVPV9/vH3DSikGh+Q/NYHTMAG4wpajVSTymVBNME8aEyDuqlWIz7QKrEmraCoMVIaxWg0ikEUYrWKsSZGYrEEcbcpwTRgBITd2iCSuEFIomi66C4ufH5/zFkyLLs7MwuHM8Per+uai3O+52HuGWA/e77nnO9RRGBmZtaVXlkHMDOz8udiYWZmBblYmJlZQS4WZmZWkIuFmZkV5GJhZmYFuViYmVlBLhZmZlaQi4WZmRXUJ+sAu8qAAQNi0KBBqe1/06ZN7LPPPqntP23Ony3nz1Yl5087+wsvvPD7iPhAofX2mGIxaNAgli5dmtr+6+rqGDlyZGr7T5vzZ8v5s1XJ+dPOLulXxaznbigzMyvIxcLMzApysTAzs4JcLMzMrCAXCzMzKyi1YiFptqTfSlrZyXJJ+rak1ZJWSPpo3rILJf0yeV2YVkYzMytOmkcWDwJjulh+BjAkeV0GfAdAUn/ga8BJwInA1yQdmGJOMzMrILX7LCLiJ5IGdbHKWcBDkXuu688kHSDpEGAksCAi3gSQtIBc0ZmTVtZK09LSwrRp02hpadlh2ZAhQzj88MN3aF+2bBlbt27drm3z5s0sXrx4h3aAz3/+83z0ox/dod3MeqYsb8o7DHgtb74xaeusfQeSLiN3VEJVVRV1dXVFv/moUaM6bI+v7ddh+0iATnavr/+hw/ba2tqi85Sivr6eG2+8scNl8bX94NUd208F+MmO7WN6gabsmP9HP/oRd911184F7UIlf//g/PkqOX8lZ4fdnD8iUnsBg4CVnSz7d2BE3vxC4ATgH4Hr89pvAL5a6L1OOOGESFNtbW2q+99ZixcvjoMPPjh+97vfdbi8s/yXXHJJjB8/PsVku0a5f/+FOH+2Kjl/2tmBpVHEz/Msr4ZqBPL7SwYC67pot060tLQwbtw47rrrLgYMGFDStt/85jd5+umnSzoqM7OeJ8tiMRf4m+SqqI8Bb0fE68B84BOSDkxObH8iabNOTJkyhaOOOoqxY8eWvO3+++/PvffeyyWXXMI777yTQjoz2xOkeensHOB54COSGiWNkzRe0vhklXnAGmA1cD/wdwCRO7E9BViSvG5O2qwDL774IjNnzmT69OlI6tY+PvvZz3LiiSdyww037OJ0ZranSPNqqJoCywO4opNls4HZaeTak7z33ntcfPHFTJs2jUMOOWSn9vXtb3+bY445hrFjx/Kxj31sFyU0sz2F7+CuYLfddhtVVVVceOHO37c4YMAA7rzzTi6++OIOL8k1s57NxaJCNTQ08K1vfYuZM2d2u/upvXPOOYcjjzySf/qnf9ol+zOzPYeLRQXasmULF198MV//+tf54Ac/uMv2K4l7772X++67j2XLlu2y/VrlmjNnDsOGDWP06NEMGzaMOXN8b2xPtcc8Ka8n+elPf0pTUxPjx48vvHKJDj30UCZOnMidd97Jgw8+uMv3b5Vjzpw5TJ48mVmzZrFlyxZ69+7NuHHjAKip6fKUpO2BfGRRgTZv3kxVVRW9eqXz13fIIYewefPmVPZtlWPq1KnMmjWLUaNG0adPH0aNGsWsWbOYOnVq1tEsAy4WZtahhoYGRowYsV3biBEjaGhoyCiRZcnFwsw6VF1dzaJFi7ZrW7RoEdXV1Rklsiy5WJilqJJPEE+ePJlx48ZRW1tLa2srtbW1jBs3jsmTJ2cdzTLgE9xmKan0E8RtGSdMmEBDQwPV1dVMnTq1IrLbrucjCytrEyZMoF+/fowaNYp+/foxYcKErCMVbU84QVxTU8PKlStZuHAhK1eudKHowXxkYWVrwoQJzJgxg1tvvZWjjz6a+vp6Jk6cCMDdd9+dcbrCGhoaaGxsZNiwYdt+M584caJPEFtFcrGwsnX//fdz7rnnMnv27G0/bM8991zuv//+iigWhx56KNdccw2PPvrotm6oL37xixx66KFZRzMrmbuhrGy1tLQwf/58Nm3aBMCmTZuYP39+RY1d1X4oll01NIvZ7uYjCytrLS0tPP7449t+Mz/rrLOyjlS0devWcfnll3PGGWfQ0tJC3759ufjii7nvvvuyjmZWMhcLK2vvvPMOL774IkcffTQrVqyoqAc0HXrooTz55JM888wz7oayiuduKCtrn/70p7nuuus444wzuO666/j0pz+ddaSSVHo3VCXfJ2K7lo8srGwNHDiQn//85zv8Zj5w4MCsoxVl3bp1PPjgg9vdp3Drrbdy0UUXZR2tKJV+n4jtWqkeWUgaI+llSaslTepg+YckLZS0QlKdpIF5y7ZIWpa85qaZ08rTtGnTePvttzn11FM5/fTTOfXUU3n77beZNm1a1tGKUl1dzcCBA7e7T2HgwIEVM1zGnnCfiO06aT6DuzcwHTgDOBqokXR0u9W+CTwUEccCNwPfyFv2bkQcl7zOTCunla/FixfT3NxMnz65A+A+ffrQ3NzM4sWLM05WnEofLiP/PpG2bqjGxkbfJ9JDpdkNdSKwOiLWAEh6DDgLqM9b52jg6mS6FvhhinmswsyYMYMDDzyQJ554Yls3yBe+8AVmzJhREfdZ1NTU8OCDDzJ69GgiAkmcfvrpFdOF4/tELF+axeIw4LW8+UbgpHbrLAf+CrgLOBt4v6SDIuINoJ+kpUArcEtE7FBIJF0GXAZQVVVFXV3dLv8QbZqamlLdfymWL1/Oxo0bS8pTSv76+no2bNiQ+edtbW3lmmuuQRLNzc3su+++XHPNNUyaNCnzbMW46667WLhwIePHj+fUU0/lueeeY+bMmZx99tlcddVVWccrqLm5ma1bt7Js2TIGDx7Mq6++yubNm2ltba2I7z9fOf3/LVXZZI+IVF7AWOCBvPkLgLvbrXMo8APgRXIFoxHYv21Z8ucRwFrgT7t6vxNOOCHSVFtbm+r+S7FgwYIYPXp0SduUkv+RRx6JmpqaElPtekBMmzYtIv6Yf9q0aZH7Z1v++vbtG7fffntE/DH/7bffHn379s0wVfF69eoVDz30UAwdOjR69eoVQ4cOjYceeih69eqVdbSSldP/31KlnR1YGkX8TE/zBHcjcHje/EBgXf4KEbEuIj4fEccDk5O2t9uWJX+uAeqA41PMamWof//+TJo0iTvuuIPm5mbuuOMOJk2aRP/+/bOOVpSWlhb69++/XZ9///79K+YO9Eo/QW+7VprdUEuAIZIGA78BzgO+mL+CpAHAmxGxFbgWmJ20Hwi8ExEtyTonA5VxCYztMvfccw/jx49n0qRJvPfee+y1117su+++3HPPPVlHK0qfPn346le/usM5l7YT9uWu7QR926WzbSfofTVUz5Tav9qIaJV0JTAf6A3MjohVkm4md9gzFxgJfENSAD8Brkg2rwbuk7SV3BVbt0RE/Q5vYnu0thPBU6dOpaGhgSOPPJLJkydXzAni/fbbj7feemu7O9DfeustDjjggKyjFcXPs7B8qf6KExHzgHnt2m7Mm34CeKKD7RYDx6SZzSpDTU0NNTU11NXVMXLkyKzjlOStt97i8ssv57rrrts2NtTll19eUWNDVfL3b7uWh/swS0l1dTVjx46lubmZ2tpampubGTt2rPv8rSK5WFhZq+SxiSr9pjyzfJVxps16pEofm8h9/rYn8ZGFla09YWwiP8Pa9hQuFla2GhoaGDFixHZtI0aM8NhEZhlwsbCyVV1dzaJFi7ZrW7RokU8Qm2XAxcLKlk8Qm5UPn+C2suUTxGblw8XCyppvCjMrD+6GMjOzglwszMysIBcLMzMryMXCzMwKcrEwM7OCXCzMzKwgFwszMyvIxcLMzApKtVhIGiPpZUmrJU3qYPmHJC2UtEJSnaSBecsulPTL5HVhmjmtfFXy8yzM9iSp3cEtqTcwHTgdaASWSJrb7lna3wQeioh/kXQq8A3gAkn9ga8Bw4EAXki23ZhWXis/lf48C7M9SZpHFicCqyNiTURsBh4Dzmq3ztHAwmS6Nm/5J4EFEfFmUiAWAGNSzGplaE94noXZniLNsaEOA17Lm28ETmq3znLgr4C7gLOB90s6qJNtD2v/BpIuAy4DqKqqoq6ubldl30FTU1Oq+y/F8uXL2bhxY0l5SslfX1/Phg0bMv+8DQ0NbNmyhbq6um35t2zZQkNDQ+bZSlVO/366w/mzUy7Z0ywW6qAt2s3/A3CPpIuAnwC/AVqL3JaImAnMBBg+fHikOdBcOQ1k19rayjPPPFNSnlLyr1u3jjVr1mT+eaurq+nduzcjR47clr+2tpbq6urMs5WqnP79dIfzZ6dcsqfZDdUIHJ43PxBYl79CRKyLiM9HxPHA5KTt7WK2tT2fn2dhVj7SPLJYAgyRNJjcEcN5wBfzV5A0AHgzIrYC1wKzk0XzgX+WdGAy/4lkufUgfp6FWflIrVhERKukK8n94O8NzI6IVZJuBpZGxFxgJPANSUGuG+qKZNs3JU0hV3AAbo6IN9PKauXLz7MwKw+pPvwoIuYB89q13Zg3/QTwRCfbzuaPRxpmZpYh38FtZmYFuViYmVlBLhZmZlaQi4WZmRXkYmFmZgUVLBaSPibp/Xnz75fUftgOMzPbgxVzZPEdoClvflPSZmZmPUQxxUIRsW1cpuRu61TvzzAzs/JSTLFYI+nLkvZKXlcBa9IOZmZm5aOYYjEe+Aty4zu1DTN+WZqhzMysvBTsToqI35IbBNDMzHqoYq6GOjJ5TvbKZP5YSdenH83MzMpFMd1Q95MbHvw9gIhYgY80zMx6lGKKxZ9ExM/btbWmEcbMzMpTMcXi95L+lOSxppK+ALyeaiozMysrxdwvcQW551wfJek3wKvAX6eayszMykoxxeJXEXGapH2AXhHxf2mHMjOz8lJMN9SrkmYCH2P7YT8KkjRG0suSVkua1MHyD0qqlfSipBWSPpW0D5L0rqRlyWtGKe9rZma7VjHF4iPAs+S6o16VdI+kEYU2ktQbmA6cARwN1Eg6ut1q1wOPR8Tx5K6wujdv2SsRcVzyGl9ETjMzS0nBYhER70bE4xHxeeB4YD/gP4vY94nA6ohYExGbgceAs9rvPtkfwP7AuqKTm5nZblPUgICS/hI4l9xRwhLgnCI2Owx4LW++baiQfDcBP5Y0AdgHOC1v2WBJLwJ/AK6PiP/qINdlJEOPVFVVUVdXV8zH6ZampqZU91+K5cuXs3HjxpLylJK/vr6eDRs2lM3nhfL6/rvD+bNVyfnLJntEdPkid/XTk0ANsE+h9fO2Gws8kDd/AXB3u3W+Anw1mf44UE/uaKcvcFDSfgK5orNfV+93wgknRJpqa2tT3X8pFixYEKNHjy5pm1LyP/LII1FTU1NiqnSV0/ffHc6frUrOn3Z2YGkU8TO9mCOLP4uIP3SjDjUCh+fND2THbqZxwBiAiHheUj9gQOTGo2pJ2l+Q9ApwJLC0GznMzGwndXnOQtIngbGSPtSu/eIi9r0EGCJpsKS9yZ3AnttunV8Do5N9VgP9gN9J+kByghxJRwBD8LDoZmaZ6bRYSPpnYDJwDPBccl6hzZWFdhwRrcl684EGclc9rZJ0s6Qzk9W+ClwqaTkwB7goOSw6BViRtD8BjI+IN0v/eGZmtit01Q31WeD4iGiVdBPwqKQjIuJqQMXsPCLmAfPatd2YN10PnNzBdt8Hvl/Me5iZWfq66obqkxwdEBFvkSse+0n6N2Dv3RHOzMzKQ1fF4pXkklkAImJLRIwDXgaqU09mZmZlo6tiMRZoPzQ5EXE921/lZGZme7hOz1lExLtt05IOJHdFUr+8VX6TYi4zMysjBe+zkHQJcBW5+ySWkRtQ8Hng1HSjmZlZuShmIMGrgD8nN1T5KHLjQ/0u1VRmZlZWiikWzRHRDCCpb0T8D7mRaM3MrIcoZriPRkkHAD8EFkjaiEeHNTPrUQoWi4g4O5m8SVItuaHE/yPVVGZmVlaKHaK8N1BFbgRagIPJjetkZmY9QDFXQ00AvgZsALYmzQEcm2IuMzMrI8UcWVwFfCQi3kg7jJmZladiroZ6DXg77SBmZla+ijmyWAPUSfp3kgcSAUTEHamlMjOzslJMsfh18tobjzZrZtYjFXPp7Nd3RxAzMytfnRYLST8id9VThyLizM6WmZnZnqWrE9zfBG4nd2/Fu8D9yasJWFnMziWNkfSypNWSJnWw/IOSaiW9KGmFpE/lLbs22e7l5FngZmaWka6GKP9PAElTIuKUvEU/kvSTQjtObuSbDpwONAJLJM1NHqXa5npyz+b+jqSjyT2CdVAyfR4wFDgUeFbSkRGxpcTPZ2Zmu0Axl85+QNIRbTOSBgMfKGK7E4HVEbEmIjYDjwFntVsngP2S6f3545hTZwGPRURLRLwKrE72Z2ZmGSjmaqiryV06uyaZHwRcVsR2h5G7R6NNI3BSu3VuAn6c3CW+D3Ba3rY/a7ftYe3fQNJlbVmqqqqoq6srIlb3NDU1pbr/UixfvpyNGzeWlKeU/PX19WzYsKFsPi+U1/ffHc6frUrOXy7Zi7ka6j8kDQGOSpr+JyJautomoY52126+BngwIm6X9HHgYUnDityWiJgJzAQYPnx4jBw5sohY3VNXV0ea+y9Fa2srzzzzTEl5Ssm/bt061qxZUzafF8rr++8O589WJecvl+xFDSSYFIflJe67ke2f1T2QHYc2HweMSd7jeUn9gAFFbmtmZrtJMecsumsJMETSYEl7kzthPbfdOr8GRgNIqib3jO/fJeudJ6lvco5kCPDzFLOamVkXijqy6I6IaJV0JTAf6A3MjohVkm4GlkbEXOCrwP2SribXzXRRRASwStLjQD3QClzhK6HMzLJTzBDlJwPLImKTpPOBjwJ3RcSvCm0bEfPIXQ6b33Zj3nQ9cHIn204FphZ6DzMzS18x3VDfAd6R9GfANcCvgIdSTWVmZmWlmGLRmnQNnUXuiOIu4P3pxjIzs3JSzDmL/5N0LXA+cEpyZ/Ze6cYyM7NyUsyRxbnknmMxLiLWk7s57rZUU5mZWVkp5qa89cAdefO/xucszMx6lIJHFpI+JmmJpCZJmyVtkeTHrJqZ9SDFdEPdQ25Yjl8C7wMuITearJmZ9RDFDvexWlLv5Ma470panHIuMzMrI8UUi3eS4TqWSZoGvE5uhFgzM+shiumGuiBZ70pgE7kB/v4qzVBmZlZeujyySO6pmBoR5wPNwNd3SyozMysrXR5ZJOcoPpB0Q5mZWQ9VzDmLtcBPJc0l1w0FQETc0ekWZma2RymmWKxLXr3wmFBmZj1SMXdwfx1A0j4RsanQ+mZmtucp5g7uj0uqBxqS+T+TdG/qyczMrGwUc+nsncAngTcAImI5cEqaoczMrLwU9QzuiHitXVNRjziVNEbSy5JWS5rUwfJvSVqWvP5X0lt5y7bkLWv/7G4zM9uNijnB/ZqkvwAiuYT2yyRdUl1J7tGYDpwONAJLJM1NHqUKQERcnbf+BOD4vF28GxHHFfcxzMwsTcUcWYwHriD3HItG4Djg74rY7kRgdUSsiYjNwGPknrbXmRpgThH7NTOz3azTIwtJAyOiMSJ+D/x1u2WfBX5UYN+HAfndV43ASZ2814eAwcBzec39JC0FWoFbIuKHHWx3GXAZQFVVFXV1dQUidV9TU1Oq+y/F8uXL2bhxY0l5SslfX1/Phg0byubzQnl9/93h/Nmq5Pxlkz0iOnwBLwODOmj/EvBKZ9vlrTcWeCBv/gLg7k7Wndh+GXBo8ucR5G4M/NOu3u+EE06INNXW1qa6/1IsWLAgRo8eXdI2peR/5JFHoqampsRU6Sqn7787nD9blZw/7ezA0ijw8zwiuuyGuhpYIGlIW0PyLO6vAH9ZRB1qJDfoYJuB5G7u68h5tOuCioh1yZ9rgDq2P59hZma7UafFIiLmkTtf8YykYZLuBD4DnBIRjUXsewkwRNLg5MT4ecAOVzVJ+ghwIPB8XtuBkvom0wOAk4H69tuamdnu0eXVUBGxUNJF5H6zXwyMjojmYnYcEa2SrgTmA72B2RGxStLN5A572gpHDfBYcjjUphq4T9JWcgXtlsi7isrMzHavrk5w/x8QgIC+wGjgt5IERETsV2jnydHJvHZtN7abv6mD7RYDxxSR38zMdoNOi0VEeNBAMzMDiryD28zMejYXCzMzK8jFwszMCnKxMDOzglwszMysIBcLMzMryMXCzMwKcrEwM7OCXCzMzKwgFwszMyvIxcLMzApysTAzs4JcLMzMrCAXCzMzK8jFwszMCnKxMDOzglItFpLGSHpZ0mpJkzpY/i1Jy5LX/0p6K2/ZhZJ+mbwuTDOnmZl1rctncO8MSb2B6cDpQCOwRNLc/GdpR8TVeetPAI5PpvsDXwOGk3u06wvJthvTymtmZp1L88jiRGB1RKyJiM3AY8BZXaxfA8xJpj8JLIiIN5MCsQAYk2JWMzPrQmpHFsBhwGt5843ASR2tKOlDwGDguS62PayD7S4DLgOoqqqirq5up0N3pqmpKdX9l2L58uVs3LixpDyl5K+vr2fDhg1l83mhvL7/7nD+bFVy/nLJnmaxUAdt0cm65wFPRMSWUraNiJnATIDhw4fHyJEjuxGzOHV1daS5/1K0trbyzDPPlJSnlPzr1q1jzZo1ZfN5oby+/+5w/mxVcv5yyZ5mN1QjcHje/EBgXSfrnscfu6BK3dbMzFKWZrFYAgyRNFjS3uQKwtz2K0n6CHAg8Hxe83zgE5IOlHQg8ImkzczMMpBaN1REtEq6ktwP+d7A7IhYJelmYGlEtBWOGuCxiIi8bd+UNIVcwQG4OSLeTCurmZl1Lc1zFkTEPGBeu7Yb283f1Mm2s4HZqYUzM7Oi+Q5uMzMryMXCzMwKcrEwM7OCXCzMzKwgFwszMyvIxcLMzApysTAzs4JcLMzMrCAXCzMzK8jFwszMCkp1uA+zndWvXz9aWlq2zfft25fm5uYME5VG2nG0/bxh0Mpepec/9thjeemll7bNH3PMMaxYsSLDRJXLRxZWttoKRVVVFd/97nepqqqipaWFfv36ZR2tKPk/aEeMGNFheznLz3nOOed02F7O2grFmWeeyZNPPsmZZ57JSy+9xLHHHpt1tIrkYmFlq61QrF+/nkGDBrF+/fptBaOSRARTpkypqN/I80UEf/u3f1tx+dsKxVNPPcUBBxzAU089ta1gWOlcLKystX+cZDk8XrIU5557bpfz5W7ixIldzpe7WbNmdTlvxfM5Cytr1dXVWUfYKd/73vf43ve+l3WMbrv11lu59dZbs47RbX/+53/O66+/TktLC3379uWQQw7JOlLF8pGFVYTzzz8/6wg7Zf/99886wk4ZNmxY1hFK1r9/f9auXcuHP/xh5syZw4c//GHWrl1L//79s45WkXxkYRXhX//1X7OOsFPefvvtrCPslJUrV2YdoWSbNm3i4IMPZtWqVdTU1ABw8MEHs3HjxoyTVaZUjywkjZH0sqTVkiZ1ss45kuolrZL0aF77FknLktcOz+62nmHGjBlEBLW1tUQEM2bMyDpSSS699NLt8l966aVZRyrJ9OnTt8s/ffr0rCMVraWlhVdeeWW7/K+88krFXSBRLlIrFpJ6A9OBM4CjgRpJR7dbZwhwLXByRAwF/j5v8bsRcVzyOjOtnFbexo8f3+V8ubv//vu7nC93V1xxRZfz5axv3747/HIxY8YM+vbtm1GiypbmkcWJwOqIWBMRm4HHgLParXMpMD0iNgJExG9TzGMVShJPPfVUxVzf354kbrvttorO/4Mf/KDi8l966aVMnDiRO+64g+bmZu644w4mTpxYcUd35SLNcxaHAa/lzTcCJ7Vb50gAST8FegM3RcR/JMv6SVoKtAK3RMQPU8xqZSgitv2AuvPOO7drrwT5+efNm7ddeyXIz3/33Xdv114J2jJfd911266GGj9+/HafxYqntP7iJY0FPhkRlyTzFwAnRsSEvHWeBt4DzgEGAv8FDIuItyQdGhHrJB0BPAeMjohX2r3HZcBlAFVVVSc89thjqXwWgKamJvbdd9/U9l+KF154gUcffZTbb7+96G1Kyf/ss8/y/PPPc8MNN3Q34i5XTt9/dzh/tio5f9rZR40a9UJEDC+0XppHFo3A4XnzA4F1Hazzs4h4D3hV0svAEGBJRKwDiIg1kuqA44HtikVEzARmAgwfPjxGjhyZwsfIqaurI839l6K1tZVnnnmmpDyl5F+3bh1r1qwpi89b6WMTOX+29t13XzZt2rRtfp999qGpqSnDRMWbM2cOU6dOpaGhgerqaiZPnrztqq4spHnOYgkwRNJgSXsD5wHtr2r6ITAKQNIAct1SayQdKKlvXvvJQH2KWa0MddZHXil9586frbZCMWjQIB5++GEGDRrEpk2bKuIIY86cOUyePJm7776b+fPnc/fddzN58mTmzJmTWabUikVEtAJXAvOBBuDxiFgl6WZJbVc3zQfekFQP1AL/GBFvANXAUknLk/ZbIsLFwsyK1nZEsXbtWi644ALWrl27XXs5mzp1KrNmzWLUqFH06dOHUaNGMWvWLKZOnZpZplRvyouIecC8dm035k0H8JXklb/OYuCYNLNVsuXLl7Nw4UI+97nPFb3N73//ewYMGFDUuq+99hpHHXVUd+PtcpL4xje+wbXXXltRXSD5TjvtNJ599tmsY3TbJZdcwgMPPJB1jG6pqqrilltuYdKkSWzYsCHrOEVpaGjYbqRiyI1c3NDQkFEi38FdkcaMGcP69es5+eSTi97m7LPP5sknnyx6/XIa3mHr1q3U1dWxdevWiukCyRcR1NXVsWDBgorOf//991dcfkmsX7+euro61q9fT69evSriF47q6moWLVrEqFGjtrUtWrQo07HSXCwq0NChQ7nttttK3q6UI5FyIonPfOYz2/3HqSSSOOSQQ3j99dezjtItkjj22GMr8qFBEcHgwYOZMmUKgwcProhCATB58mTGjRvHrFmz2LJlC7W1tYwbN27P7YYy21WefvoKW8NiAAAKD0lEQVTprCPslEotFG0qsVC0aTtnUUnarnqaMGHCtquhpk6dusdeDWVmlpm2YT3yn7SY317uampqWLlyJQsXLmTlypWZFgrwkYWVsfw7iNu3VwLnz1ZzczP9+vVjw4YNfOlLXwIq7xnu5cRHFlbWImK7UUMr5QdVG+fPVnNz83b5XSi6z8XCzMwKcjeUlbVK7gaBys9/0EEH8eabb26b79+/P2+88UaGiSwrPrKwspX/g/bMM8/ssL2c5ec89dRTO2wvZ22FYujQocyZM4ehQ4fy5ptvctBBB2UdzTLgYmFlLyK4+uqrK+o38nwRwQ033FBx+dsKxcqVKzn44INZuXLltoJhPY+LxR5GUoevQsvKVSU/qQ3Y4fr+SrveP/85HB3NW8/hYrGHabtipf0r/2qW9q9y1v6Zz5X0DGiAhx9+uMv5cvepT32qy3nrOVwsrOxJ4lvf+lbZHwV1RhJTpkypuPz9+/dn1apVDBs2jPXr1zNs2DBWrVpF//79s45mGfDVUFa28m8Kmzt37nbtlSA//3PPPbddeyV44403OOigg1i1atW2u4d9NVTP5SMLK2uVflNYped/4403tsvvQtFzuViYmVlBLhZmZlaQi4WZmRXkYmFmZgW5WJiZWUGqtKszOiPpd8CvUnyLAcDvU9x/2pw/W86frUrOn3b2D0XEBwqttMcUi7RJWhoRw7PO0V3Ony3nz1Yl5y+X7O6GMjOzglwszMysIBeL4s3MOsBOcv5sOX+2Kjl/WWT3OQszMyvIRxZmZlaQi0UBkmZL+q2klVln6Q5Jh0uqldQgaZWkq7LOVApJ/ST9XNLyJP/Xs85UKkm9Jb0o6emss5RK0lpJL0laJmlp1nlKJekASU9I+p/k/8DHs85ULEkfSb73ttcfJP19ZnncDdU1SacATcBDETEs6zylknQIcEhE/ELS+4EXgM9FRH3G0Yqi3Bjf+0REk6S9gEXAVRHxs4yjFU3SV4DhwH4R8Zms85RC0lpgeERU5D0Kkv4F+K+IeEDS3sCfRMRbWecqlaTewG+AkyIizfvJOuUjiwIi4idAxT50OCJej4hfJNP/BzQAh2WbqniR05TM7pW8KuY3HEkDgU8DD2SdpaeRtB9wCjALICI2V2KhSIwGXsmqUICLRY8iaRBwPPDf2SYpTdKNswz4LbAgIiop/53ANcDWrIN0UwA/lvSCpMuyDlOiI4DfAd9NugEfkLRP1qG66TxgTpYBXCx6CEn7At8H/j4i/pB1nlJExJaIOA4YCJwoqSK6AyV9BvhtRLyQdZadcHJEfBQ4A7gi6ZatFH2AjwLfiYjjgU3ApGwjlS7pPjsT+Lcsc7hY9ABJX//3gUci4gdZ5+mupAuhDhiTcZRinQycmfT7PwacKulfs41UmohYl/z5W+BJ4MRsE5WkEWjMOxJ9glzxqDRnAL+IiA1ZhnCx2MMlJ4hnAQ0RcUfWeUol6QOSDkim3wecBvxPtqmKExHXRsTAiBhErhvhuYg4P+NYRZO0T3JRBEn3zSeAirkqMCLWA69J+kjSNBqoiAs72qkh4y4oyB2mWRckzQFGAgMkNQJfi4hZ2aYqycnABcBLSb8/wHURMS/DTKU4BPiX5GqQXsDjEVFxl6BWqCrgydzvG/QBHo2I/8g2UskmAI8kXTlrgC9lnKckkv4EOB24PPMsvnTWzMwKcTeUmZkV5GJhZmYFuViYmVlBLhZmZlaQi4WZmRXkYmGpkxSSbs+b/wdJN+2ifddJGp5Mr5U0YFfsdyczPSnpc3nzL0u6Pm/++5I+vxtylMX3YXsGFwvbHVqAz/egH1yLgb8AkHQQuVGL84fG/niyzh5Hku/d2kO5WNju0Eru0ZBXt1+Q3KH9fUlLktfJSfs+ybNEliSDwJ2VtL9P0mOSVkj6HvC+jt5Q0vnJczCWSbovuakPSU2Sbk0GxntW0onJ0ckaSWcm6wyS9F+SfpG82n7wj0zWbXs+wiPJHfLt/ZSkWCR/Pg18QDmDgXcjYr2kiyTdk5f5aUkj83JOVe45Hj+TVJW0j5W0Mmn/SdLWW9I3lXvuxApJE/KyTEg+w0uSjurgexqa9z2tkDQkaf+bZH65pIeTtg9JWpi0L5T0waT9QUl3SKoFbu3s784qXET45VeqL3K/We8HrAX2B/4BuClZ9igwIpn+ILlhSQD+GTg/mT4A+F9gH+ArwOyk/VhyhWh4Mr8WGABUAz8C9kra7wX+JpkO4Ixk+kngx+SGPf8zYFnS/idAv2R6CLA0mR4JvE1uQMNewPNt2dt93r7AW8DewDfIjWX1MHA08Nfkno0CcBFwT952TwMj83J+NpmeBlyfTL8EHNb2vSR//i25sb/6JPP9876PCcn03wEPdJD1buCvk+m9yRXfocDLwIB2+/sRcGEyfTHww2T6wSR7767+7rL+d+jXzr18yGi7RUT8QdJDwJeBd/MWnQYcnfcL+n7JeESfIDcI3z8k7f3IFZNTgG8n+1whaUUHbzcaOAFYkuz3feSGNwfYDLQNWfES0BIR70l6CRiUtO8F3CPpOGALcGTevn8eEY0AyfApg8g9kCn/s7ZIWkVu0LqPkfthfwS5o4zjKa4LajO5H8CQe2DV6cn0T4EHJT0OtA0KeRowIyJak/fPf/7KD/L20dF5kueByco9d+MHEfFLSacCT0TywKO8/X08bx8PJ5+rzb9FxJZkurO/u4YiPreVKRcL253uBH4BfDevrRfw8YjILyBtAyD+VUS83K4dCj/8SMC/RMS1HSx7L5Jfeck9Y6IFICK25vW3Xw1sIHe00Qtoztu+JW96C9BH0knAfUnbjRExl1xBOAV4f0RslPQz4EpyxWJGsm4r23cF9+sk5xaS/6sRMT55v08Dy5KCJjr/TtrybttHvoh4VNJ/J/ubL+mSAvvbbvO86U150x3+3Vll8zkL222S31AfB8blNf+Y3A9RAJIffgDzyfW3K2k/Pmn/CbmuHJR7rsWxHbzVQuALkv5fsl5/SR8qIer+wOsRsZXcIIy9C3yu/46I45LX3KT5p+QGf1uezK8gd5TxQWBV0rYWOE5SL0mHU8Tw35L+NHm/G4HfA4eT+w7HtxU7Sf2L/aCSjgDWRMS3gbnkvs+FwDnJyfn8/S0mN3ou5P4OFtGxzv7urIK5WNjudju58wptvgwMT06a1gPjk/Yp5LqDVkhamcwDfAfYN+l+ugb4efs3iNzzxa8n94S3FcACcqPXFute4MLkaOBItv+tuViLyXU9PZ9kaiXXFbY0KUKQKyivkusO+ya5o65CbktOVq8kVziXk3tk66/JfVfLgS+WkPNcYGXSpXYUufMpq4CpwH8m+2sb2v7LwJeS7/QC4KpO9tnZ351VMI86a2ZmBfnIwszMCnKxMDOzglwszMysIBcLMzMryMXCzMwKcrEwM7OCXCzMzKwgFwszMyvo/wPFuiOkn7iOlwAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"boxplot_of_distances(d, d2v, \"Keras dna2vec\")"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
"\n",
"headlines = tokenizer.texts_to_sequences([\"AAAA\"])\n",
"headlines = pad_sequences(headlines,maxlen=11)\n",
"aaaa = test_encoder_model.predict(headlines)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.03107011318206787"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"headlines = tokenizer.texts_to_sequences([\"AAA\"])\n",
"headlines = pad_sequences(headlines,maxlen=11)\n",
"aaa = test_encoder_model.predict(headlines)\n",
"\n",
"cosine(aaa, aaaa)"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics.pairwise import cosine_similarity"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.9689299]], dtype=float32)"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cosine_similarity(aaa, aaaa)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment