Skip to content

Instantly share code, notes, and snippets.

@xccds
Created December 29, 2014 03:46
Show Gist options
  • Select an option

  • Save xccds/6276408bfa984f545342 to your computer and use it in GitHub Desktop.

Select an option

Save xccds/6276408bfa984f545342 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "",
"signature": "sha256:293d70338edf11bc811a89f342478cef9a5147db929144e5d8561424c8999d83"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### \u7528gemsim\u5305\u505aLSA\u548cLDA\n",
"- \u5efa\u7acb\u8bed\u6599\n",
"- \u8f6c\u6210tfidf\n",
"- \u5efa\u7acbLSA\uff08\u6d45\u5c42\u8bed\u4e49\u5206\u6790\uff09\n",
"- \u67e5\u8be2\u76f8\u4f3c\u6587\u6863\n",
"- \u5efa\u7acbLDA \uff08\u4e3b\u9898\u6a21\u578b\uff09"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- \u5efa\u7acb\u8bed\u6599"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# \u751f\u6210\u6587\u6863\uff0c9\u672c\u4e66\u540d\n",
"from gensim import corpora, models, similarities\n",
"documents = ['The Neatest Little Guide to Stock Market Investing',\n",
" 'Investing For Dummies, 4th Edition',\n",
" 'The Little Book of Common Sense Investing: The Only Way to Guarantee Your Fair Share of Stock Market Returns',\n",
" 'The Little Book of Value Investing',\n",
" 'Value Investing: From Graham to Buffett and Beyond',\n",
" 'Rich Dad\\'s Guide to Investing: What the Rich Invest in, That the Poor and the Middle Class Do Not!',\n",
" 'Investing in Real Estate, 5th Edition',\n",
" 'Stock Investing For Dummies',\n",
" 'Rich Dad\\'s Advisors: The ABC\\'s of Real Estate Investing: The Secrets of Finding Hidden Profits Most Investors Miss'\n",
"]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 72
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# \u53bb\u9664\u505c\u7528\u8bcd\n",
"from nltk.corpus import stopwords\n",
"texts = [[word for word in document.lower().split() if word not in stopwords.words(\"english\")] for document in documents]\n",
"#texts = [[word for word in document.lower().split() if word not in stoplist]\n",
" # for document in documents]\n",
"print texts"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[['neatest', 'little', 'guide', 'stock', 'market', 'investing'], ['investing', 'dummies,', '4th', 'edition'], ['little', 'book', 'common', 'sense', 'investing:', 'way', 'guarantee', 'fair', 'share', 'stock', 'market', 'returns'], ['little', 'book', 'value', 'investing'], ['value', 'investing:', 'graham', 'buffett', 'beyond'], ['rich', \"dad's\", 'guide', 'investing:', 'rich', 'invest', 'in,', 'poor', 'middle', 'class', 'not!'], ['investing', 'real', 'estate,', '5th', 'edition'], ['stock', 'investing', 'dummies'], ['rich', \"dad's\", 'advisors:', \"abc's\", 'real', 'estate', 'investing:', 'secrets', 'finding', 'hidden', 'profits', 'investors', 'miss']]\n"
]
}
],
"prompt_number": 75
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# \u5c06\u5217\u8868\u4e2d\u7684\u8bcd\u8f6c\u4e3aword-id\u6620\u5c04\u5b57\u5178\n",
"dictionary = corpora.Dictionary(texts)\n",
"print dictionary.token2id"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"{u'real': 32, u'estate,': 31, u'fair': 11, u'share': 16, u'edition': 8, u'investing:': 13, u'investors': 39, u'sense': 15, u'beyond': 19, u'graham': 21, u'market': 3, u'guarantee': 12, u'little': 2, u'estate': 36, u'investing': 1, u'miss': 40, u'5th': 30, u'buffett': 20, u'returns': 14, u'book': 9, u'way': 17, u'finding': 37, u'hidden': 38, u'dummies': 33, u'stock': 5, u'poor': 28, u'rich': 29, u'in,': 24, u'4th': 6, u'class': 22, u\"abc's\": 34, u'middle': 26, u'secrets': 42, u'invest': 25, u'dummies,': 7, u'value': 18, u'not!': 27, u'common': 10, u'neatest': 4, u'advisors:': 35, u\"dad's\": 23, u'guide': 0, u'profits': 41}\n"
]
}
],
"prompt_number": 78
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# \u539f\u59cb\u6587\u6863\u88ab\u8f6c\u4e3a\u73b0\u6709\u5b57\u5178\u4e2d\u8bcd\u7f16\u53f7\u548c\u9891\u6570\n",
"# \u8868\u793a1\u53f7\u8bcd(investing)\u548c9\u53f7\u8bcd(book)\u5404\u51fa\u73b01\u6b21\n",
"new_doc = \"Investing book\"\n",
"new_vec = dictionary.doc2bow(new_doc.lower().split())\n",
"print new_vec"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[(1, 1), (9, 1)]\n"
]
}
],
"prompt_number": 80
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# \u6240\u6709\u6587\u6863\u88ab\u8f6c\u4e3a\u4e0a\u8ff0\u5f62\u5f0f\u7684\u8bcd\u6599\u5e93\n",
"corpus = [dictionary.doc2bow(text) for text in texts]\n",
"print corpus"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[[(0, 1), (1, 1), (2, 1), (3, 1), (4, 1), (5, 1)], [(1, 1), (6, 1), (7, 1), (8, 1)], [(2, 1), (3, 1), (5, 1), (9, 1), (10, 1), (11, 1), (12, 1), (13, 1), (14, 1), (15, 1), (16, 1), (17, 1)], [(1, 1), (2, 1), (9, 1), (18, 1)], [(13, 1), (18, 1), (19, 1), (20, 1), (21, 1)], [(0, 1), (13, 1), (22, 1), (23, 1), (24, 1), (25, 1), (26, 1), (27, 1), (28, 1), (29, 2)], [(1, 1), (8, 1), (30, 1), (31, 1), (32, 1)], [(1, 1), (5, 1), (33, 1)], [(13, 1), (23, 1), (29, 1), (32, 1), (34, 1), (35, 1), (36, 1), (37, 1), (38, 1), (39, 1), (40, 1), (41, 1), (42, 1)]]\n"
]
}
],
"prompt_number": 81
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# \u8bed\u6599\u548cnumpy\u53ef\u4ee5\u517c\u5bb9\u4e92\uff0c\u5728\u8f6c\u6362\u65f6\u9700\u8bbe\u7f6e\u7279\u5f81\u6570\n",
"from gensim import matutils\n",
"numpy_matrix = matutils.corpus2dense(corpus,num_terms=43)\n",
"# corpus = gensim.matutils.Dense2Corpus(numpy_matrix)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 87
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# \u7ecf\u5178\u7684\u8bcd\u9879-\u6587\u6863\u77e9\u9635\uff0c\u5176\u4e2d\u503c\u4e3a\u8bcd\u9891\n",
"numpy_matrix[:10,:]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 89,
"text": [
"array([[ 1., 0., 0., 0., 0., 1., 0., 0., 0.],\n",
" [ 1., 1., 0., 1., 0., 0., 1., 1., 0.],\n",
" [ 1., 0., 1., 1., 0., 0., 0., 0., 0.],\n",
" [ 1., 0., 1., 0., 0., 0., 0., 0., 0.],\n",
" [ 1., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" [ 1., 0., 1., 0., 0., 0., 0., 1., 0.],\n",
" [ 0., 1., 0., 0., 0., 0., 0., 0., 0.],\n",
" [ 0., 1., 0., 0., 0., 0., 0., 0., 0.],\n",
" [ 0., 1., 0., 0., 0., 0., 1., 0., 0.],\n",
" [ 0., 0., 1., 1., 0., 0., 0., 0., 0.]], dtype=float32)"
]
}
],
"prompt_number": 89
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- \u8f6c\u6210tfidf"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"tfidf = models.TfidfModel(corpus)\n",
"corpus_tfidf = tfidf[corpus]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 92
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# \u89c2\u5bdf\u7ed3\u679c\n",
"for doc in corpus_tfidf:\n",
" print doc\n",
"# \u4e5f\u53ef\u8f6c\u4e3a\u77e9\u9635\n",
"# numpy_matrix = gensim.matutils.corpus2dense(corpus_tfidf,num_terms=43)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[(0, 0.432183228105567), (1, 0.16889525686595316), (2, 0.315676444823041), (3, 0.432183228105567), (4, 0.631352889646082), (5, 0.315676444823041)]\n",
"[(1, 0.1678477239832813), (6, 0.6274370726791256), (7, 0.6274370726791256), (8, 0.42950271385560695)]\n",
"[(2, 0.17076298056699674), (3, 0.23378651588573063), (5, 0.17076298056699674), (9, 0.23378651588573063), (10, 0.34152596113399347), (11, 0.34152596113399347), (12, 0.34152596113399347), (13, 0.1260470706374678), (14, 0.34152596113399347), (15, 0.34152596113399347), (16, 0.34152596113399347), (17, 0.34152596113399347)]\n",
"[(1, 0.2384389845229864), (2, 0.44565828749810027), (9, 0.6101375014879464), (18, 0.6101375014879464)]\n",
"[(13, 0.1943875188252588), (18, 0.3605413479900455), (19, 0.5266951771548322), (20, 0.5266951771548322), (21, 0.5266951771548322)]\n",
"[(0, 0.22884371488266889), (13, 0.12338213684169494), (22, 0.3343052929236428), (23, 0.22884371488266889), (24, 0.3343052929236428), (25, 0.3343052929236428), (26, 0.3343052929236428), (27, 0.3343052929236428), (28, 0.3343052929236428), (29, 0.45768742976533777)]\n",
"[(1, 0.15422435074989552), (8, 0.39464209354603486), (30, 0.5765110951399715), (31, 0.5765110951399715), (32, 0.39464209354603486)]\n",
"[(1, 0.2327026293256009), (5, 0.43493665890677735), (33, 0.8698733178135547)]\n",
"[(13, 0.11367055621369232), (23, 0.21083110588444598), (29, 0.21083110588444598), (32, 0.21083110588444598), (34, 0.30799165555519964), (35, 0.30799165555519964), (36, 0.30799165555519964), (37, 0.30799165555519964), (38, 0.30799165555519964), (39, 0.30799165555519964), (40, 0.30799165555519964), (41, 0.30799165555519964), (42, 0.30799165555519964)]\n"
]
}
],
"prompt_number": 93
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- LSA \u6f5c\u5728\u8bed\u4e49\u5206\u6790"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"lsi = models.LsiModel(corpus_tfidf, id2word=dictionary, num_topics=2) # \u521d\u59cb\u5316\u4e00\u4e2aLSA\u6a21\u578b\uff0c\u4e24\u7ef4\u8bed\u4e49\u7a7a\u95f4\n",
"corpus_lsi = lsi[corpus_tfidf] # \u5728\u539f\u59cb\u8bed\u6599\u4e0a\u5c06\u6211\u4eec\u7684Tf-Idf\u8bed\u6599\u5e93\u8f6c\u6362\u5230\u6f5c\u5728\u4e8c\u7ef4\u8bed\u4e49\u7a7a\u95f4"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 94
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for doc in corpus_lsi: # \u5404\u6587\u6863\u57282\u7ef4\u4e3b\u9898\u4e2d\u7684\u7a7a\u95f4\u5750\u6807\n",
" print doc"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[(0, 0.63347241325167303), (1, -0.061896725129924259)]\n",
"[(0, 0.17868461025638538), (1, 0.59723455114094137)]\n",
"[(0, 0.59206256115968325), (1, -0.22058707251542273)]\n",
"[(0, 0.65195370541457509), (1, -0.19985332890364949)]\n",
"[(0, 0.30911487606210863), (1, -0.15735262094227312)]\n",
"[(0, 0.18203461585802136), (1, 0.26529717841569633)]\n",
"[(0, 0.18486893062294102), (1, 0.68829562498621732)]\n",
"[(0, 0.38579940702314952), (1, 0.045443408403883161)]\n",
"[(0, 0.1118111644377808), (1, 0.43788078154125371)]\n"
]
}
],
"prompt_number": 95
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd\n",
"x1 = [doc[0][1] for doc in corpus_lsi]\n",
"x2 = [doc[1][1] for doc in corpus_lsi]\n",
"names = range(9)\n",
"df = pd.DataFrame({'x1':x1,'x2':x2,'doc':names})\n",
"df"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>doc</th>\n",
" <th>x1</th>\n",
" <th>x2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 0</td>\n",
" <td> 0.633472</td>\n",
" <td>-0.061897</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 1</td>\n",
" <td> 0.178685</td>\n",
" <td> 0.597235</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 2</td>\n",
" <td> 0.592063</td>\n",
" <td>-0.220587</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 3</td>\n",
" <td> 0.651954</td>\n",
" <td>-0.199853</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> 4</td>\n",
" <td> 0.309115</td>\n",
" <td>-0.157353</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td> 5</td>\n",
" <td> 0.182035</td>\n",
" <td> 0.265297</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td> 6</td>\n",
" <td> 0.184869</td>\n",
" <td> 0.688296</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td> 7</td>\n",
" <td> 0.385799</td>\n",
" <td> 0.045443</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td> 8</td>\n",
" <td> 0.111811</td>\n",
" <td> 0.437881</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 162,
"text": [
" doc x1 x2\n",
"0 0 0.633472 -0.061897\n",
"1 1 0.178685 0.597235\n",
"2 2 0.592063 -0.220587\n",
"3 3 0.651954 -0.199853\n",
"4 4 0.309115 -0.157353\n",
"5 5 0.182035 0.265297\n",
"6 6 0.184869 0.688296\n",
"7 7 0.385799 0.045443\n",
"8 8 0.111811 0.437881"
]
}
],
"prompt_number": 162
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- 9\u4e2a\u6587\u6863\u5728\u4e8c\u7ef4\u8bed\u4e49\u7a7a\u95f4\u4e2d\u7684\u4f4d\u7f6e"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# 0,2,3\u53f7\u6587\u6863\u6709\u6bd4\u8f83\u9ad8\u7684\u76f8\u4f3c\u6027\n",
"%matplotlib inline\n",
"from ggplot import *\n",
"p = ggplot(df, aes(x = 'x1', y = 'x2', label='doc')) + geom_point() + geom_text(size=20)\n",
"print(p) "
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "display_data",
"png": "iVBORw0KGgoAAAANSUhEUgAAAqQAAAHuCAYAAABJbR4+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xt01IWd/vFnMjMJScgkMzhoIooSkgjCSkO0iBRE8OhC\nrV0BXSpeUMLpVq3Kbsuu3R50q65svdTqVuXSults1YKKlmy81riiqFzkKiRoFAyiwASGhFyZ+f2x\nP7ObA5GQzOTz/Yb365w9ZGa+k318CPD0OzdPPB6PCwAAADCSYh0AAAAAJzYGKQAAAEwxSAEAAGCK\nQQoAAABTDFIAAACY8lkHSIZdu3ZZR+iQ1+tV//799dVXX+nw4cPWcbokNTVVzc3N1jGOm9u7d2vv\nEt1bonsbbu9donsrbu1d6rj7vLy8Y96XM6Q9LCUlRR6PRykp7q3e6/VaR+gSt3fv1t4lurdE9zbc\n3rtE91bc2rvUve7d+bsFAACAXoNBCgAAAFMMUgAAAJhikAIAAMAUgxQAAACmGKQAAAAwxSAFAACA\nKQYpAAAATDFIAQAAYIpBCgAAAFMMUgAAAJhikAIAAMAUgxQAAACmGKQAAAAwxSAFAACAKQYpAAAA\nTDFIAQAAYIpBCgAAAFMMUgAAAJhikAIAAMAUgxQAAACmGKQAAAAwxSAFAACAKQYpAAAATDFIAQAA\nYIpBCgAAAFMMUgAAAJhikAIAAMCUzzoAcDz++7//W08++aTWrFmjaDSqYDCos846SzfeeKMuuugi\n63gAAKALGKRwjbvvvluPP/648vLydMkllygUCmnfvn3auHGjVq1axSAFAMClGKRwhaeeekqPP/64\nrrzySv3bv/2bfL72P7qtra1GyQAAQHfxHFI4XlNTk+bPn68BAwYcdYxKOup1AADAHfhXHI731ltv\nKRKJaMqUKfJ4PHrttde0bds2paWl6Vvf+pZGjhxpHREAAHQDgxSOt379eklSamqqLrroIm3fvl2S\n5PF4JEmjRo3SggULFAqFzDICAICu4yF7ON7evXslSY8//rh27tzZdv3pp5+u0aNHa9WqVZo9e7ZV\nPAAA0E0MUjheLBZr+7qpqant688++0znnHOOcnNztWrVKq1Zs8YiHgAA6CYGKRwvOztbkhQOh4+4\nzefz6cILL5T0vw/tAwAAd2GQwvEGDx7c9mthYWHb9YWFhSotLVUgEJAkNTY2muQDAADdw4ua4Hhj\nxoyRx+PRp59+qhUrVmjRokWSpNLSUoVCIW3btk2SdNppp1nGBAAAXcQgheOdeuqpuvjii/XKK6/o\nueee09y5c9tuq6io0Jtvvqns7GyNHz/eMCUAAOgqBilc4Z577tGmTZt011136fXXX9fZZ5+tHTt2\n6OWXX5bf79f999+vvn37WscEAABdwCCFK+Tm5qq8vFwPPfSQXnnlFb333nvKysrSJZdcoptvvlnn\nnHOOdUQAANBFDFK4RigU0i9+8Qv94he/sI4CAAASiFfZAwAAwBSDFAAAAKYYpAAAADDFIAUAAIAp\nBikAAABMMUgBAABgikEKAAAAUwxSAAAAmGKQAgAAwBSDFAAAAKYYpAAAADDFIAUAAIApBikAAABM\nMUgBAABgikEKAAAAUwxSAAAAmGKQAgAAwBSDFAAAAKYYpAAAADDFIAUAAIApBikAAABMMUgBAABg\nymcdAPi//vznP2vVqlXavHmztmzZovr6ev3N3/yNHnnkEetoAAAgSRikcJSHH35YH330kfr27avc\n3Fxt375dHo/HOhYAAEgiBikc5a677lJeXp7OOOMMvfvuu5o2bZp1JAAAkGSOGaRVVVUqLy9XPB5X\ncXGxxowZc8Qx1dXVevnll3X48GFlZGRo5syZBkmRTKNHj277Oh6PGyYBAAA9xRGDNBaLqaysTNde\ne60CgYAWLFigoqIihcPhtmMaGhpUVlamGTNmKDs7W/X19YaJAQAAkCiOeJV9TU2NQqGQgsGgvF6v\nhg0bpq1bt7Y7ZuPGjRoyZIiys7MlSZmZmRZR0UMikYj++Mc/Kh6Pq7m52ToOAABIIkecIY1Go21D\nU5ICgYBqamraHROJRHT48GE9+eSTampq0qhRo3TOOecoGo2qrq6u3bGNjY3KysrqkezHy+fztfvV\njbxer/x+f9K+/759+zRlyhRVVlZKkt58801Fo1H169evW9/X7d0nu/dkons7dG/D7b1LdG/Frb1L\n3eveEb9bnXkV9eHDh/XFF1/ouuuuU0tLixYtWqQBAwZow4YNqqioaHfs9OnTHX0G9dChQ0pLS1Na\nWpp1lC5L5h/0+++/v22MStLBgwf1u9/9TvPmzev293Z79279C1aie0t0b8PtvUt0b8WtvUtd794R\n/8VZWVk6cOBA2+VoNKpAINDumOzsbGVkZMjv98vv92vgwIHavXu3Ro4cqaKionbHNjY2OvY5pj6f\nT8FgULW1tWptbbWO0yVpaWlqampK2vdvaWk56nXd/T11e/fJ7j2Z6N4O3dtwe+8S3Vtxa+9Sx91n\nZGQc+77JDNZZeXl5ikQiqq2tVVZWljZt2qSpU6e2O6aoqEhlZWWKxWJqbW1VTU2Nzj//fAUCgSPG\n665du446apyktbXV8Rk74vP5kpr9hhtu0IoVK9rOkmZlZemGG25I2P9Pt3af7N57At3boXsbbu1d\nonsrbu9d6lr3jhikXq9XkyZN0pIlSxSLxVRcXKxwOKzVq1dLkkpKShQOhzV48GA99thj8ng8Ki4u\nVv/+/Y2TIxlCoZCWLVumefPm6bnnntOFF16oUChkHQsAACSJIwapJBUUFKigoKDddSUlJe0uX3DB\nBbrgggt6MhaMhEIhTZ8+Xc8//7xSU1Ot4wAAgCRyzCAFJKm8vFzl5eWSpD179kiS1qxZo9tuu02S\n1K9fP/385z83ywcAABKPQQpH2bJli5YuXdr2zgsej0c7duzQZ599Jkk67bTTGKQAAPQyDFI4ypw5\nczRnzhzrGAAAoAc54pOaAAAAcOJikAIAAMAUgxQAAACmGKQAAAAwxSAFAACAKQYpAAAATDFIAQAA\nYIpBCgAAAFMMUgAAAJhikAIAAMAUgxQAAACmGKQAAAAwxSAFAACAKQYpAAAATDFIAQAAYIpBCgAA\nAFMMUgAAAJhikAIAAMAUgxQAAACmGKQAAAAwxSAFAACAKQYpAAAATDFIAQAAYIpBCgAAAFMMUgAA\nAJhikAIAAMAUgxQAAACmGKQAAAAwxSAFAACAKQYpAAAATDFIAQAAYIpBCgAAAFMMUgAAAJjyxOPx\nuHWIRNu7d6+8Xq91jKPyeDxKTU1Vc3Oz3Fp9SkqKYrGYdYzj5vbu3dq7RPeW6N6G23uX6N6KW3uX\nOu4+GAwe876+ZAaz0tzcbB2hQ36/Xzk5Oaqvr1dLS4t1nC5JT09XQ0ODdYzj5vbu3dq7RPeW6N6G\n23uX6N6KW3uXOu6+M4OUh+wBAABgikEKAAAAUwxSAAAAmGKQAgAAwBSDFAAAAKYYpAAAADDFIAUA\nAIApBikAAABMMUgBAABgikEKAAAAUwxSAAAAmGKQAgAAwBSDFAAAAKYYpAAAADDFIAUAAIApn3UA\nHCkej+vFF1/U73//e1VXV2v//v06+eSTNXz4cM2ePVsjR460jggAAJAwDFIH+slPfqKnn35aoVBI\nl156qYLBoKqrq/XKK6+orKxMDz/8sK644grrmAAAAAnBIHWYzz//XE8//bT69++v1157TaFQqO22\nd955R1deeaXuv/9+BikAAOg1eA6pw+zbt0+S9K1vfavdGJWk0aNHKzMzU5FIxCIaAABAUjBIHaao\nqEj9+/fXunXrjhieq1atUn19vb7zne8YpQMAAEg8HrJ3mD59+mjx4sW65ZZbNG7cOOXm5qpPnz7q\n16+f3nrrLY0bN07z58+3jgkAAJAwDFIHGjJkiC677DI9+uij7c6SDhw4UNOmTTvioXwAAAA3Y5A6\nTGtrq6666iqtXr36iNvi8bhuvvlmbd68WT/72c8M0gEAACQezyF1mGXLlmnNmjXKz88/4rbvfve7\nOuWUU/TEE09ox44dBukAAAASj0HqMBs2bJAkXXXVVSosLGy7vrCwUH/3d3+nESNGKBaLafPmzVYR\nAQAAEoqH7B3G7/dLkhoaGrRs2TItXLhQklRaWqpQKNT2tlBfHwcAAOB2nCF1mK/f0umpp55Sc3Oz\n5s6dq7lz5yoUCumNN97QBx98oD59+qikpMQ4KQAAQGJwhtRhJkyYoEsvvVTl5eW68MILdemllyoc\nDquqqkqvvfaaPB6P7rjjDuXk5FhHBQAASAgGqQMtWLBAS5Ys0bJly1ReXq6GhgYFg0FNnDhRN9xw\ng8aOHWsdEQAAIGEYpA6UkpKia6+9Vtdee611FAAAgKTjOaQAAAAwxSAFAACAKQYpAAAATDFIAQAA\nYMoxL2qqqqpSeXm54vG4iouLNWbMmKMeV1NTo0WLFmnatGkaOnRoD6cEAABAojniDGksFlNZWZlm\nzJihm266SRs3btSePXuOetyrr76qwYMHG6QEAABAMjhikNbU1CgUCikYDMrr9WrYsGHaunXrEce9\n9957Gjp0qDIzMw1SAgAAIBkcMUij0aiys7PbLgcCAR08ePCIY7Zt26Zzzz23p+MBAAAgiRzxHFKP\nx3PMY8rLyzVx4kR5PB7F4/G266PRqOrq6tod29jYqKysrITnTASfz9fuVzfyer3y+/3WMY6b27t3\na+8S3Vuiextu712ieytu7V3qXveO+N3KysrSgQMH2i5Ho1EFAoF2x+zatUtLly6VJB06dEjbt29X\nSkqKvvjiC1VUVLQ7dty4cRo/fnzyg3dDMBi0jnDCons7dG+H7m3Qux26t9OV7h0xSPPy8hSJRFRb\nW6usrCxt2rRJU6dObXfMbbfd1vb1Cy+8oMLCQp111lnKy8tTUVFRu2MbGxuP+qIoJ/D5fAoGg6qt\nrVVra6t1nC5JS0tTU1OTdYzj5vbu3dq7RPeW6N6G23uX6N6KW3uXOu4+HA4f+77JDNZZXq9XkyZN\n0pIlSxSLxVRcXKxwOKzVq1dLkkpKSjq8byAQOOrZ1JaWlqRm7q7W1lbHZ+yIz+dzbXbJvd27vXeJ\n7i3RvQ239i7RvRW39y51rXtHDFJJKigoUEFBQbvrOhqi3//+93siEgAAAHqAI15lDwAAgBMXgxQA\nAACmGKQAAAAwxSAFAACAKQYpAAAATDFIAQAAYIpBCgAAAFMMUgAAAJhikAIAAMAUgxQAAACmGKQA\nAAAwxSAFAACAKQYpAAAATDFIAQAAYIpBCgAAAFMMUgAAAJhikAIAAMAUgxQAAACmGKQAAAAwxSAF\nAACAKQYpAAAATDFIAQAAYIpBCgAAAFMMUgAAAJhikAIAAMAUgxQAAACmGKQAAAAwxSAFAACAKQYp\nAAAATDFIAQAAYIpBCgAAAFMMUgAAAJhikAIAAMAUgxQAAACmGKQAAAAwxSAFAACAKQYpAAAATHni\n8XjcOkSi7d27V16v1zrGUXk8HqWmpqq5uVlurT4lJUWxWMw6xnFze/du7V2ie0t0b8PtvUt0b8Wt\nvUsddx8MBo95X18yg1lpbm62jtAhv9+vnJwc1dfXq6WlxTpOl6Snp6uhocE6xnFze/du7V2ie0t0\nb8PtvUt0b8WtvUsdd9+ZQcpD9gAAADDFIAUAAIApBikAAABMMUgBAABgqle+qAm9w7e//W3V1NQc\n9bZwOKx169b1cCIAAJAMDFI4WnZ2tmbNmnXE9RkZGQZpAABAMjBI4WiBQEC33367dQwAAJBEPIcU\nAAAApjhDCkdramrSsmXLVFNTo4yMDA0dOlSjRo1SSgr/WwoAgN6CQQrH8ng8+uqrr3Trrbe2fQSZ\nx+PR6aefrgcffFCjRo0yTggAABKB00xwrKuuukqLFy/WmWee2XZdIBDQzp07NWPGDG3ZssUwHQAA\nSBQGKRzr9ttv14cffqhPPvmk7boDBw5oxIgRamxs1IMPPmiYDgAAJAqDFK4zbNgwSdJ7771nnAQA\nACQCgxSOVlpaqsLCwrbLhYWF+uEPfyhJOnTokFUsAACQQLyoCY4WCoW0bNkyLVy4UNL/DNQNGzZI\nkgYOHGgZDQAAJAiDFI60fft25eXlKSMjQ6FQSHPnzpUk7dy5Uz/72c8kSVdccYVlRAAAkCAMUjjS\n8uXL9cQTT2jUqFE69dRT1bdvX3366ad644031NTUpAkTJrQ9dA8AANyNQQpHuuCCC/TJJ59o06ZN\nWr16tQ4dOqTs7Gx9+9vf1pQpUzRlyhTriAAAIEEYpHCkUaNG8cb3AACcIHiVPQAAAEwdc5C+++67\nevDBB/XKK68ccdt9992XlFAAAAA4cXzjIP3973+vyZMnq6KiQtdff70mTZqkurq6ttvvueeepAcE\nAABA7/aNg/Tee+9VeXm5li9fro8//ljhcFjjx49XbW1tT+UDAABAL/eNg7SmpkbnnXeeJCk9PV3/\n8R//oQsvvFDjxo3Tl19+2SMBAQAA0Lt946vsTznlFFVWVrb76MZf/vKXysjI0He+8x21trYmPSAA\nAAB6t288Q/q9731Pf/jDH464/q677tL111+vpqampAUDAADAieEbz5Def//9kqS33npLY8eObXfb\nHXfcoTPOOCNpwQAAAHBi6NT7kE6ZMkU//elP1dLSIkmqra3VVVddpXnz5iU1HAAAAHq/Tg3S9evX\na/369Tr33HO1ePFiDR8+XDk5Ofrwww+TnQ8AAAC9XKcGaV5enl544QUdPnxYpaWlmjRpkp544gll\nZmYmOx8AAAB6uU4N0nXr1qmkpESDBg3S8uXL9frrr2v69Onav39/svMBAACgl+vUIJ04caJuv/12\nLV++XJdddpnWr1+vjIwMDR8+PNn5AAAA0Mt946vsv/b+++8rPz+/7XLfvn21ePFiLV++PGnBAAAA\ncGLo1BnS/ztG/6/LL788oWEAAABw4unUGdKeUFVVpfLycsXjcRUXF2vMmDHtbt+wYYNWrlypeDyu\ntLQ0TZ48WaeccopRWgAAACSKIwZpLBZTWVmZrr32WgUCAS1YsEBFRUUKh8NtxwSDQc2cOVN9+vRR\nVVWVXnrpJZWWlhqmBgAAQCJ06iH7ZKupqVEoFFIwGJTX69WwYcO0devWdsecdtpp6tOnjyRpwIAB\nikajFlEBAACQYI44QxqNRpWdnd12ORAIqKampsPj165dq4KCgrb71tXVtbu9sbFRWVlZyQnbTT6f\nr92vbuT1euX3+61jHDe3d+/W3iW6t0T3Ntzeu0T3Vtzau9S97h3xu+XxeDp9bHV1tdatW6cbb7xR\nkrRmzRpVVFS0O2b69OmOftP+Q4cOKS0tTWlpadZRusytf9Dd3r1be5fo3hLd23B77xLdW3Fr71LX\nu3fEf3FWVpYOHDjQdjkajSoQCBxx3O7du/Xiiy9qxowZSk9PlySNHDlSRUVF7Y5rbGxUfX19ckN3\nkc/nUzAYVG1trVpbW63jdElaWpqampqsYxw3t3fv1t4lurdE9zbc3rtE91bc2rvUcfcZGRnHvm8y\ng3VWXl6eIpGIamtrlZWVpU2bNmnq1Kntjtm/f7+eeeYZXXHFFerXr1/b9YFA4IjxumvXLrW0tPRI\n9q5qbW11fMaO+Hw+12aX3Nu923uX6N4S3dtwa+8S3Vtxe+9S17p3xCD1er2aNGmSlixZolgspuLi\nYoXDYa1evVqSVFJSooqKCjU2NmrFihWSpJSUFM2ePdsyNgAAABLAEYNUkgoKCtpeqPS1kpKStq8v\nv/xy3ogfAACgF3LE2z4BAADgxMUgBQAAgCkGKQAAAEwxSAEAAGCKQQoAAABTDFIAAACYYpACAADA\nFIMUAAAAphikAAAAMMUgBQAAgCkGKQAAAEwxSAEAAGCKQQoAAABTDFIAAACYYpACAADAFIMUAAAA\nphikAAAAMMUgBQAAgCkGKQAAAEwxSAEAAGCKQQoAAABTDFIAAACYYpACAADAFIMUAAAAphikAAAA\nMMUgBQAAgCkGKQAAAEwxSAEAAGCKQQoAAABTDFIAAACYYpACAADAFIMUAAAAphikAAAAMMUgBQAA\ngCkGKQAAAEwxSAEAAGDKE4/H49YhEm3v3r3yer3WMY7K4/EoNTVVzc3Ncmv1KSkpisVi1jGOm9u7\nd2vvEt1bonsbbu9donsrbu1d6rj7YDB4zPv6khnMSnNzs3WEDvn9fuXk5Ki+vl4tLS3WcbokPT1d\nDQ0N1jGOm9u7d2vvEt1bonsbbu9donsrbu1d6rj7zgxSHrIHAACAKQYpAAAATDFIAQAAYIpBCgAA\nAFMMUgAAAJhikAIAAMAUgxQAAACmGKQAAAAwxSAFAACAKQYpAAAATDFIAQAAYIpBCgAAAFMMUgAA\nAJjyWQcAABzpmWee0d///d9/4zEpKSnasWNHDyUCgORhkAKAAw0bNqzDQbpq1SqtXLlSF110UQ+n\nAoDkYJACgAOdffbZOvvss49622WXXSZJuvrqq3syEgAkDc8hBQAX+eijj7Ru3Trl5uZq4sSJ1nEA\nICEYpADgIk899ZQkafr06fJ4PMZpACAxeMgeABwuEolo4cKFam1t1dKlS+Xz+TR9+nTrWACQMAxS\nAHCwSCSiKVOmqLKysu26Cy+8ULm5uYapACCxeMgeABxs4cKF7caoJGVlZRmlAYDkYJACgMucccYZ\n1hEAIKEYpADgYKWlpSosLGy73K9fP82ePdswEQAkHoMUABwsFArpD3/4g9LS0pSSkqJnnnlGoVDI\nOhYAJBSDFAAcbuXKlWpubtbEiRM1ZMgQ6zgAkHAMUgBwuK/fe5RPZgLQWzFIAcDBqqqq9MEHHygv\nL08TJkywjgMAScH7kAKAgxUUFOjzzz+3jgEAScUZUgAAAJhikAIAAMAUgxQAAACmGKQAAAAwxSAF\nAACAKQYpAAAATDFIAQAAYMox70NaVVWl8vJyxeNxFRcXa8yYMUccU1ZWpu3bt8vv9+v73/++cnNz\nDZICAAAgkRxxhjQWi6msrEwzZszQTTfdpI0bN2rPnj3tjqmsrFQkEtGPf/xjXXbZZfrzn/9slBYA\nAACJ5IhBWlNTo1AopGAwKK/Xq2HDhmnr1q3tjtm2bZtGjBghSRowYIAaGxtVV1dnERcAAAAJ5IiH\n7KPRqLKzs9suBwIB1dTUtDvm4MGDCgQC7Y6JRqOKxWJHDNPGxkZlZWUlN3QX+Xy+dr+6kdfrld/v\nt45x3NzevVt7l+jeEt3bcHvvEt1bcWvvUve6d8Tvlsfj6fJ916xZo4qKinbXjRs3TuPHj+9urKQK\nBoPWEU5YdG+H7u3QvQ16t0P3drrSvSMGaVZWlg4cONB2ORqNtjsb+k3HjBw5UkVFRe2ObWxsPOI5\nqE7h8/kUDAZVW1ur1tZW6zhdkpaWpqamJusYx83t3bu1d4nuLdG9Dbf3LtG9Fbf2LnXcfTgcPvZ9\nkxmss/Ly8hSJRFRbW6usrCxt2rRJU6dObXdMUVGR3n//fQ0fPlw7d+5Unz591LdvX0k6Yrzu2rVL\nLS0tPZa/K1pbWx2fsSM+n8+12SX3du/23iW6t0T3Ntzau0T3Vtzeu9S17h0xSL1eryZNmqQlS5Yo\nFoupuLhY4XBYq1evliSVlJSosLBQVVVVevjhh5WamqrLL7/cODUAAAASwRGDVJIKCgpUUFDQ7rqS\nkpJ2lydPntyTkQAAANADHPG2TwAAADhxMUgBAABgikEKAAAAUwxSAAAAmGKQAgAAwBSDFAAAAKYY\npAAAADDFIAUAAIApBikAAABMMUgBAABgikEKAAAAUwxSAAAAmGKQAgAAwBSDFAAAoJt27dqlOXPm\nqLi4WIMGDdKoUaM0b948HThwwDqaK/isAwAAALjZp59+qssvv1z79u3TpZdeqvz8fK1bt06LFy/W\nm2++qRdeeEHBYNA6pqNxhhQAAKAb7rjjDu3bt0933323Fi1apH/6p3/Ss88+q9LSUn388ceaP3++\ndUTHY5ACAAB00aeffqq33npLp59+uq6//vp2t/3DP/yD0tPT9dxzz6mhocEmoEswSAEAALronXfe\nkSSNHTv2iNsyMzN17rnn6tChQ1qzZk1PR3MVBikAAEAXffzxx5KkQYMGKRKJaP78+Zo/f74ikYgk\n6cwzz5QkVVdXm2V0A17UBAAA0EUHDx6UJHm9Xk2ZMkWVlZWSpPLyci1btkxZWVmSpGg0apbRDThD\nCgAA0E0VFRVtY1SSKisrtXDhQsNE7sIgBQAA6KKvz4A2Nzcf9favz6AGAoEey+RGDFIAAIAuGjx4\nsCTplFNOUWFhYdv1hYWFKi0tbXvu6KBBg0zyuQXPIQUAAOii0aNHS5Lef/99vfTSS1q0aJEkqbS0\nVKmpqfrggw+UkZGh4uJiy5iOxyAFAADoooEDB2rcuHGqqKjQiy++qLlz57bdduedd6qhoUHXXHON\n0tPTDVM6H4MUAACgG+69915dfvnl+vnPf663335bgwcP1tq1a/Xuu+8qPz+/3UjF0fEcUgAAgG4Y\nOHCgysrKdOWVV2rdunVasGCBdu7cqVmzZumll15STk6OdUTH4wwpAABAN+Xl5enBBx+0juFanCEF\nAACAKQYpAAAATDFIAQAAYIpBCgAAAFMMUgAAAJhikAIAAMAUgxQAAACmGKQAAAAw5YnH43HrEIm2\nd+9eeb1e6xhH5fF4lJqaqubmZrm1+pSUFMViMesYx83t3bu1d4nuLdG9Dbf3LtG9Fbf2LnXcfTAY\nPOZ9e+UnNTU3N1tH6JDf71dOTo7q6+vV0tJiHadL0tPT1dDQYB3juLm9e7f2LtG9Jbq34fbeJbq3\n4tbepY6778wg5SF7AAAAmGKQAgAAwBSDFAAAAKYYpAAAADDFIAUAAIApBikAAABMMUgBAABgikEK\nAAAAUwxSAAAAmGKQAgAAwBSDFAAAAKYYpAAAADDFIAUAAIApBikAAABMMUgBOMayZcs0YMAADRgw\nQH/84x+t4wAAegiDFIAj1NTU6J//+Z+VmZkpSfJ4PMaJAAA9hUEKwFw8HtecOXMUCoV0zTXXWMcB\nAPQwBinWY2VLAAAP6UlEQVQAc4sXL9Y777yjhx56SOnp6dZxAAA9jEEKwFRVVZX+9V//VbNmzdJ5\n551nHQcAYIBBCsBMa2urfvSjHyk9PV0+n0+RSMQ6EgDAAIMUgJl7771XW7ZsUW1trX7zm99oypQp\namhosI4FAOhhPusAAE5Ma9eu1cKFC9tdV1lZqZycHKNEAAArnCEF0ONaW1t16623fuP4jMfjPZgI\nAGCJQQqgx9XX16u6ulq1tbVH3PbBBx9Ikn7yk59owIABmjdvXk/HAwD0MB6yB9Dj0tLSNH36dHk8\nHjU2Nmrr1q2SpLPOOkuVlZXatGmTvv3tbys/P18lJSXGaQEAycYgBdDj+vTpo1/+8pdHve2BBx7Q\npk2bNG3aNP3t3/5tDycDAFjgIXsAAACYYpACcBSPx8Pn2APACYaH7AE4ypw5czRnzhzrGACAHsQZ\nUgAAAJhikAIAAMAUD9kDAAAYu+eee7R+/XpVV1crEomoT58+ys3N1cUXX6wbbrhB4XDYOmJScYYU\nAADA2KJFi9TY2KiLLrpIs2bN0pQpU5SWlqZHHnlEEyZM0CeffGIdMak4QwoAAGBs27ZtSk1NVXp6\nuhoaGtqunz9/vh555BH9+7//ux544AHDhMnFGVIAAABjqampR73+u9/9riTpyy+/7Mk4Pc78DOmh\nQ4e0dOlS7d+/Xzk5OZo2bZrS09PbHXPgwAE9//zzqq+vlySNHDlSo0aNsogLAADQY1599VVJ0vnn\nn2+cJLnMB+nbb7+tQYMGacyYMXr77bf19ttv6+KLL253TEpKii655BLl5uaqqalJCxYsUH5+fq9/\ngi8AADixzJ8/X3/5y1/U3NyslJQUbdy4UT/4wQ80e/Zs62hJZf6Q/bZt2zRixAhJ0jnnnKOtW7ce\ncUxWVpZyc3MlSWlpaTrppJN08ODBHs0JAACQTJFIRPfff7/ef/99ffjhh1q7dq2GDx+u733ve/L7\n/dbxksr8DGl9fb369u0rSerbt2/bw/Idqa2t1e7du3XqqadKkqLRqOrq6tod09jYqKysrOQE7iaf\nz9fuVzfyer2u/IPh9u7d2rtE95bo3obbe5fo3sJvf/tbHT58uN111dXV+sEPfqBf//rXmjZtmlGy\nzulO9z3yu/Wf//mfR4xGSbrooovaXT7W51c3NTXp2Wef1aWXXqq0tDRJ0po1a1RRUdHuuHHjxmn8\n+PHdTJ1cwWDQOsIJi+7t0L0durdB73bc2H1GRsYR11199dVauHCh7rrrLv3oRz8ySHX8utJ9jwzS\na6+9tsPbMjMzdfDgQWVlZengwYPKzMw86nGHDx/Ws88+q7/6q7/SkCFD2q4fOXKkioqK2h3b2Nio\nPXv2JCZ8gvl8PgWDQdXW1qq1tdU6TpekpaWpqanJOsZxc3v3bu1dontLdG/D7b1Lzuq+trZWK1as\n0KuvvqqPPvpIX375pfx+v4YMGaLp06dr+vTpbSe13Nz9jBkztGzZsranLxYWFuqmm27Sm2++qS1b\ntmjz5s3q37+/ccqOddR9Z17zY34+u6ioSOvXr9eYMWP04Ycf6qyzzjrimHg8ruXLlyscDh/xKrNA\nIKBAINDuul27dqmlpSWpuburtbXV8Rk74vP5XJtdcm/3bu9dontLdG/Drb1Lzur+ueee0x133KGT\nTz5Zo0eP1qmnnqo9e/bov/7rv3T77bfr1Vdf1YIFC9rdx43dBwIBvfTSS3rkkUckSaWlpQoEAtq9\ne7c8Ho/S0tJc8d/Ule7NB+mYMWP0pz/9SWvXrm172yfpf54b+tJLL+nqq6/Wjh07tGHDBp188sl6\n/PHHJUkTJkxQQUGBZXQAANAD8vPz9eSTT2rixIntrv/Hf/xHTZ48WWVlZSorK9OkSZOMEnbPJ598\nopNOOkmBQED9+vXT3LlzJUmxWEz33Xef9u3bp3Hjxh31If3ewnyQZmRk6Lrrrjvi+kAgoKuvvlqS\nNHDgQN155509nAwAADjBBRdccNTrw+GwrrnmGs2fP1+rVq1y7SB9/fXXdd999+m8887TGWecoUAg\noD179mjVqlXasWOHBgwYoPvuu886ZlKZD1IAAICu+voV3V6v1zhJ140dO1afffaZ3n//fW3evFkH\nDhxQ3759lZ+fr+nTp+uGG27o8DU2vQWDFAAAuEIkEtHChQsl/e/zK5cuXSpJjn93nW9SVFSku+++\nW5KO+Cz7EwWDFAAAOF4kEtGUKVNUWVkpSSovL9f555+vbdu2acKECRo7dqxxQnQHgxQAADjewoUL\n28aoJFVWVqqyslKFhYX69a9/bZgMiWD+0aEAAABd0a9fP/3pT39Sdna2dRR0E4MUAAA4XmlpqQoL\nC9sup6am6vnnn9dJJ51kmAqJwiAFAACOFwqF2t7WKRwO64033lB+fr5xKiQKgxQAADjeQw89pIcf\nflgjRoxQRUWFzjzzTOtISCBe1AQAABzt2Wef1QMPPCCv16tzzz1XixYtOuKY0047TVdeeaVBOiQC\ngxQAADja559/Lul/PkrzaGNUks4//3wGqYsxSAEAgKPNmTNHc+bMsY6BJOI5pAAAADDFIAUAAIAp\nBikAAABMMUgBAABgikEKAAAAUwxSAAAAmGKQAgAAwBSDFAAAAKYYpAAAADDFIAUAAIApBikAAABM\nMUgBAABgikEKAAAAUwxSAAAAmGKQAgAAwBSDFAAAAKYYpAAAADDFIAUAAIApBikAAABMMUgBAABg\nikEKAAAAUwxSAAAAmGKQAgAAwBSDFAAAAKYYpAAAADDlicfjcesQibZ37155vV7rGEfl8XiUmpqq\n5uZmubX6lJQUxWIx6xjHze3du7V3ie4t0b0Nt/cu0b0Vt/Yuddx9MBg85n19yQxmpbm52TpCh/x+\nv3JyclRfX6+WlhbrOF2Snp6uhoYG6xjHze3du7V3ie4t0b0Nt/cu0b0Vt/Yuddx9ZwYpD9kDAADA\nFIMUAAAAphikAAAAMNUrX9TkZNFoVGvWrNHIkSMVCASs45xQ6N4O3duhexv0bofu7XSne86Q9rC6\nujpVVFSorq7OOsoJh+7t0L0durdB73bo3k53umeQAgAAwBSDFAAAAKYYpAAAADDlvfPOO++0DnEi\nicfjSk1N1RlnnKG0tDTrOCcUurdD93bo3ga926F7O93pnlfZAwAAwFSv/OhQp6iqqlJ5ebni8biK\ni4s1ZsyYdrfv2bNHy5cv1xdffKEJEyZo9OjRRkl7n2N1v2HDBq1cuVLxeFxpaWmaPHmyTjnlFKO0\nvcuxut+6dav+8pe/yOPxyOPx6OKLL9agQYOM0vYex+r9azU1NVq0aJGmTZumoUOH9nDK3ulY3VdX\nV+vpp59u+/jEIUOGaNy4cRZRe53O/NxXV1fr5Zdf1uHDh5WRkaGZM2caJO1djtX7ypUrtXHjRklS\nLBbTnj179NOf/lTp6ekdf9M4kuLw4cPxX/3qV/FIJBJvbW2N/+Y3v4l/9dVX7Y6pq6uLf/755/HX\nXnstvnLlSqOkvU9nut+xY0e8oaEhHo/H45WVlfEFCxZYRO11OtN9U1NT29e7d++O/+pXv+rpmL1O\nZ3r/+rjf/e538SVLlsQ3b95skLT36Uz3n3zySfypp54ySth7dab7Q4cOxR999NH4/v374/H4//y7\ni+7p7N83X9u6dWv8ySefPOb35UVNSVJTU6NQKKRgMCiv16thw4Zp69at7Y7JzMzUqaeeKq/Xa5Sy\nd+pM96eddpr69OkjSRowYICi0ahF1F6nM92npqa2fd3c3KyMjIyejtnrdKZ3SXrvvfc0dOhQZWZm\nGqTsnTrbPRKvM91v3LhRQ4YMUXZ2tiTxs58Ax/szv3HjRg0fPvyY35dBmiTRaLTtD4AkBQIBHTx4\n0DDRieN4u1+7dq0KCgp6Ilqv19nuP/roIz366KNasmSJ/vqv/7onI/ZKnek9Go1q27ZtOvfcc3s6\nXq/Wme49Ho927typxx57TEuWLNFXX33V0zF7pc50H4lE1NDQoCeffFJPPPGE1q9f39Mxe53j+Te2\nublZH3/8sYYMGXLM78tzSJPE4/FYRzhhHU/31dXVWrdunW688cYkJjpxdLb7IUOGaMiQIfrss8/0\n/PPP65Zbbklyst6tM72Xl5dr4sSJ8ng8ivNa1oTpTPe5ubm6/fbblZqaqqqqKj399NP68Y9/3APp\nerfOdH/48GF98cUXuu6669TS0qJFixZpwIAB6tevXw8k7J2O59/YyspKnX766d/83NH/j0GaJFlZ\nWTpw4EDb5Wg0ymfq9pDOdr979269+OKLmjFjRqf+sODYjvfnfuDAgYrFYjp06BAP3XdDZ3rftWuX\nli5dKkk6dOiQtm/frpSUFJ111lk9mrW36Uz3//ftbwoKCrRixQp+5hOgM91nZ2crIyNDfr9ffr9f\nAwcO1O7duxmk3XA8f89v2rRJw4YN69T35SH7JMnLy1MkElFtba1aW1u1adMmFRUVHfVYzlYkVme6\n379/v5555hldccUV/MWUQJ3pPhKJtP3M79q1S5L4h7mbOtP7bbfd1vZ/Q4cO1eTJkxmjCdCZ7uvq\n6tp+5j///HPF43F+5hOgM90XFRVpx44disViam5uVk1NjcLhsFHi3qGz+6axsVGfffZZp/+e4Qxp\nkni9Xk2aNElLlixRLBZTcXGxwuGwVq9eLUkqKSnRwYMHtXDhQjU1Ncnj8WjVqlW66aabeCPfbupM\n9xUVFWpsbNSKFSskSSkpKZo9e7Zl7F6hM91v2bJF69evl9frVWpqqqZOnWqc2v060zuSo7M/8x98\n8IFSUlLk9/v5mU+QznQfDoc1ePBgPfbYY/J4PCouLlb//v2Nk7tbZ/++2bp1q/Lz8+X3+zv1fXlj\nfAAAAJjiIXsAAACYYpACAADAFIMUAAAAphikAAAAMMUgBQAAgCkGKQAAAEwxSAEAAGCKQQoADvbs\ns89q9OjRyszM1Pjx463jAEBS8ElNAOBg/fr105w5c/TRRx/pjTfesI4DAEnBGVIAMPbxxx+rX79+\nWrdunSRp165dCofDeuuttzRhwgRNnTpVubm5xikBIHkYpABgLD8/X/Pnz9eMGTPU0NCgmTNnaubM\nmRo7dqx1NADoEQxSAHCAWbNmafDgwTrvvPP05Zdf6p577rGOBAA9hkEKAA4xa9Ysbd68Wbfccov8\nfr91HADoMQxSAHCAuro63XbbbZo1a5bmzZun2tradrd7PB6jZACQfAxSAHCAW2+9Veedd54WLFig\nyZMn64c//KEkKRaLqbGxUS0tLYrFYmpqalJLS4txWgBILE88Ho9bhwCAE9ny5ct18803a+PGjcrJ\nyVF9fb1GjBihf/mXf1Fzc7NmzpzZ7vjrr79ev/3tb43SAkDiMUgBAABgiofsAQAAYIpBCgAAAFMM\nUgAAAJhikAIAAMAUgxQAAACmGKQAAAAwxSAFAACAKQYpAAAATP0/h1zZErICYogAAAAASUVORK5C\nYII=\n",
"text": [
"<matplotlib.figure.Figure at 0x1070a1850>"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"<ggplot: (275962833)>\n"
]
}
],
"prompt_number": 170
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# \u4e24\u4e2a\u6f5c\u8bed\u4e49\u7684\u7ef4\u5ea6\u8868\u793a\n",
"lsi.print_topics(2)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 171,
"text": [
"[u'0.386*\"little\" + 0.350*\"book\" + 0.332*\"value\" + 0.306*\"stock\" + 0.269*\"market\" + 0.268*\"investing\" + 0.261*\"neatest\" + 0.219*\"dummies\" + 0.206*\"guide\" + 0.132*\"common\"',\n",
" u'0.436*\"edition\" + 0.327*\"estate,\" + 0.327*\"5th\" + 0.309*\"4th\" + 0.309*\"dummies,\" + 0.300*\"real\" + 0.176*\"rich\" + -0.147*\"value\" + -0.143*\"book\" + 0.131*\"investing\"']"
]
}
],
"prompt_number": 171
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- \u76f8\u4f3c\u6027\u67e5\u8be2"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# \u5c06\u67e5\u8be2\u6587\u6863\u8f6c\u5230LSI\u7a7a\u95f4\n",
"new_doc = \"Investing book\"\n",
"vec_bow = dictionary.doc2bow(new_doc .lower().split())\n",
"vec_lsi = lsi[vec_bow] \n",
"print vec_lsi"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[(0, 0.61768800065123997), (1, -0.012084299454548031)]\n"
]
}
],
"prompt_number": 176
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# \u5bf9\u8f6c\u6362\u5230LSI\u7a7a\u95f4\u7684\u8bed\u6599\u5efa\u76f8\u4f3c\u7d22\u5f15\n",
"index = similarities.MatrixSimilarity(corpus_lsi, num_features=43) "
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 178
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"sims = index[vec_lsi] # \u8fdb\u884c\u8bed\u6599\u7684\u76f8\u4f3c\u67e5\u8be2, \u4f59\u5f26\u8ddd\u79bb\n",
"print list(enumerate(sims)) # \u67e5\u8be2\u6587\u6863\u548c0\uff0c7\uff0c2\uff0c3\u53f7\u6587\u6863\u76f8\u4f3c\u6027\u8f83\u9ad8"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[(0, 0.99697202), (1, 0.26783881), (2, 0.94372416), (3, 0.96163654), (4, 0.89988339), (5, 0.54953808), (6, 0.24045581), (7, 0.9906559), (8, 0.22840852)]\n"
]
}
],
"prompt_number": 179
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# \u6392\u5e8f\u8f93\u51fa\n",
"sims = sorted(enumerate(sims), key=lambda item: -item[1])\n",
"print sims "
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[(0, 0.99697202), (7, 0.9906559), (3, 0.96163654), (2, 0.94372416), (4, 0.89988339), (5, 0.54953808), (1, 0.26783881), (6, 0.24045581), (8, 0.22840852)]\n"
]
}
],
"prompt_number": 182
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# \u6a21\u578b\u4fdd\u5b58\n",
"#lsi.save('/tmp/model.lsi') # tfidf\uff0clda...\u4e5f\u4e00\u6837\n",
"#lsi = models.LsiModel.load('/tmp/model.lsi')"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- LDA \u4e3b\u9898\u6a21\u578b"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"lda = models.LdaModel(corpus_tfidf, id2word=dictionary, num_topics=2)\n",
"lda.print_topics(2) # \u4e3b\u9898\u8868\u793a\u8bcd\u7684\u6982\u7387\u5206\u5e03"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
"WARNING:gensim.models.ldamodel:too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 184,
"text": [
"[u'0.035*dummies + 0.035*value + 0.033*investing + 0.031*stock + 0.031*little + 0.031*book + 0.030*edition + 0.028*estate, + 0.028*5th + 0.026*rich',\n",
" u'0.029*little + 0.029*stock + 0.028*market + 0.028*investing + 0.028*neatest + 0.027*guide + 0.026*book + 0.026*edition + 0.026*value + 0.026*dummies,']"
]
}
],
"prompt_number": 184
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"corpus_lda = lda[corpus_tfidf] # \u5728\u539f\u59cb\u8bed\u6599\u4e0a\u5c06\u6211\u4eec\u7684Tf-Idf\u8bed\u6599\u5e93\u8f6c\u6362\u5230lda 2-D\u7a7a\u95f4\n",
"for doc in corpus_lda: # \u5404\u6587\u6863\u5728\u4e3b\u9898\u4e2d\u7684\u7a7a\u95f4\u5750\u6807\uff0c\u6982\u7387\u5206\u5e03\n",
" print doc"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[(0, 0.61454672021557166), (1, 0.38545327978442823)]\n",
"[(0, 0.22675513712573009), (1, 0.77324486287426986)]\n",
"[(0, 0.81528087845709996), (1, 0.18471912154290002)]\n",
"[(0, 0.30096891139721366), (1, 0.69903108860278629)]\n",
"[(0, 0.20729131202399478), (1, 0.79270868797600513)]\n",
"[(0, 0.78866099283751523), (1, 0.21133900716248485)]\n",
"[(0, 0.73842987801214077), (1, 0.26157012198785928)]\n",
"[(0, 0.40103949433052083), (1, 0.59896050566947912)]\n",
"[(0, 0.78546668424881949), (1, 0.21453331575118056)]\n"
]
}
],
"prompt_number": 173
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#model = models.HdpModel(corpus_tfidf, id2word=dictionary) # LDA\u7684\u6269\u5c55-HDA \u4e0d\u9700\u8981\u4e3b\u9898\u6570"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 175
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment