Skip to content

Instantly share code, notes, and snippets.

@rokroskar
Last active August 29, 2015 14:15
Show Gist options
  • Save rokroskar/331f5cf0fc9f808d8239 to your computer and use it in GitHub Desktop.
Save rokroskar/331f5cf0fc9f808d8239 to your computer and use it in GitHub Desktop.
lasso trained on demeaned dataset
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "",
"signature": "sha256:193adf4b9d9ff01020cd01029b5bb5f28e30f02cd75bedb9df5b9e1afdced6ad"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import time\n",
"print 'starting job at %s'%(time.strftime(\"%d/%m/%Y %H:%M:%S\"))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"starting job at 19/02/2015 16:48:22\n"
]
}
],
"prompt_number": 174
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%matplotlib inline\n",
"import matplotlib.pylab as plt\n",
"plt.rcParams['figure.figsize'] = (10,6)\n",
"plt.rcParams['font.size'] = 18"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 175
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Process full Bloomberg corpus using Spark"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##Set up the environment and Spark runtime"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import os\n",
"os.environ['SPARK_HOME'] = '%s/spark'%os.environ['HOME']\n",
"spark_home = os.environ['SPARK_HOME']"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 176
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##Initialize the Spark Context"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import sys\n",
"sys.path.insert(0,os.environ['SPARK_HOME']+'/python')\n",
"sys.path.insert(0,os.environ['SPARK_HOME']+'/python/lib/py4j-0.8.2.1-src.zip')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 177
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"try :\n",
" reload(pyspark)\n",
"except : \n",
" import pyspark\n",
"from pyspark import SparkContext"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 178
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### set up the configuration"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"conf = pyspark.SparkConf()\n",
"conf.set('spark.executor.instances', '100')\n",
"conf.set('spark.executor.cores', '4')\n",
"conf.set('spark.executor.memory', '7g')\n",
"conf.set('spark.driver.memory', '6g')\n",
"#conf.set('spark.storage.memoryFraction', '0.3')\n",
"#conf.set('spark.shuffle.memoryFraction', '0.7')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 179,
"text": [
"<pyspark.conf.SparkConf at 0x2b72b0f5fe10>"
]
}
],
"prompt_number": 179
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"try : \n",
" sc.stop()\n",
"except : \n",
" pass\n",
"\n",
"sc = SparkContext(master = 'yarn-client', appName = 'Case Notebook', batchSize=100, conf = conf)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 180
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we've got a ``SparkContext`` initialized, allowing us to distribute the computation across the set of compute nodes. "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##Start analyzing case data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Set up some constants: "
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import bloomberg_ngrams, sparkgram\n",
"JUDGE_BIO_PATH = '~/work/chen_opinions/inputs/JudgesBioReshaped_TOUSE.csv'\n",
"CASE_DB_PATH = '~/work/chen_opinions/inputs/BloombergCASELEVEL_Touse.csv'\n",
"import sklearn\n",
"sw = sklearn.feature_extraction.text.ENGLISH_STOP_WORDS"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 181
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"sw"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 316,
"text": [
"frozenset({'a',\n",
" 'about',\n",
" 'above',\n",
" 'across',\n",
" 'after',\n",
" 'afterwards',\n",
" 'again',\n",
" 'against',\n",
" 'all',\n",
" 'almost',\n",
" 'alone',\n",
" 'along',\n",
" 'already',\n",
" 'also',\n",
" 'although',\n",
" 'always',\n",
" 'am',\n",
" 'among',\n",
" 'amongst',\n",
" 'amoungst',\n",
" 'amount',\n",
" 'an',\n",
" 'and',\n",
" 'another',\n",
" 'any',\n",
" 'anyhow',\n",
" 'anyone',\n",
" 'anything',\n",
" 'anyway',\n",
" 'anywhere',\n",
" 'are',\n",
" 'around',\n",
" 'as',\n",
" 'at',\n",
" 'back',\n",
" 'be',\n",
" 'became',\n",
" 'because',\n",
" 'become',\n",
" 'becomes',\n",
" 'becoming',\n",
" 'been',\n",
" 'before',\n",
" 'beforehand',\n",
" 'behind',\n",
" 'being',\n",
" 'below',\n",
" 'beside',\n",
" 'besides',\n",
" 'between',\n",
" 'beyond',\n",
" 'bill',\n",
" 'both',\n",
" 'bottom',\n",
" 'but',\n",
" 'by',\n",
" 'call',\n",
" 'can',\n",
" 'cannot',\n",
" 'cant',\n",
" 'co',\n",
" 'con',\n",
" 'could',\n",
" 'couldnt',\n",
" 'cry',\n",
" 'de',\n",
" 'describe',\n",
" 'detail',\n",
" 'do',\n",
" 'done',\n",
" 'down',\n",
" 'due',\n",
" 'during',\n",
" 'each',\n",
" 'eg',\n",
" 'eight',\n",
" 'either',\n",
" 'eleven',\n",
" 'else',\n",
" 'elsewhere',\n",
" 'empty',\n",
" 'enough',\n",
" 'etc',\n",
" 'even',\n",
" 'ever',\n",
" 'every',\n",
" 'everyone',\n",
" 'everything',\n",
" 'everywhere',\n",
" 'except',\n",
" 'few',\n",
" 'fifteen',\n",
" 'fify',\n",
" 'fill',\n",
" 'find',\n",
" 'fire',\n",
" 'first',\n",
" 'five',\n",
" 'for',\n",
" 'former',\n",
" 'formerly',\n",
" 'forty',\n",
" 'found',\n",
" 'four',\n",
" 'from',\n",
" 'front',\n",
" 'full',\n",
" 'further',\n",
" 'get',\n",
" 'give',\n",
" 'go',\n",
" 'had',\n",
" 'has',\n",
" 'hasnt',\n",
" 'have',\n",
" 'he',\n",
" 'hence',\n",
" 'her',\n",
" 'here',\n",
" 'hereafter',\n",
" 'hereby',\n",
" 'herein',\n",
" 'hereupon',\n",
" 'hers',\n",
" 'herself',\n",
" 'him',\n",
" 'himself',\n",
" 'his',\n",
" 'how',\n",
" 'however',\n",
" 'hundred',\n",
" 'i',\n",
" 'ie',\n",
" 'if',\n",
" 'in',\n",
" 'inc',\n",
" 'indeed',\n",
" 'interest',\n",
" 'into',\n",
" 'is',\n",
" 'it',\n",
" 'its',\n",
" 'itself',\n",
" 'keep',\n",
" 'last',\n",
" 'latter',\n",
" 'latterly',\n",
" 'least',\n",
" 'less',\n",
" 'ltd',\n",
" 'made',\n",
" 'many',\n",
" 'may',\n",
" 'me',\n",
" 'meanwhile',\n",
" 'might',\n",
" 'mill',\n",
" 'mine',\n",
" 'more',\n",
" 'moreover',\n",
" 'most',\n",
" 'mostly',\n",
" 'move',\n",
" 'much',\n",
" 'must',\n",
" 'my',\n",
" 'myself',\n",
" 'name',\n",
" 'namely',\n",
" 'neither',\n",
" 'never',\n",
" 'nevertheless',\n",
" 'next',\n",
" 'nine',\n",
" 'no',\n",
" 'nobody',\n",
" 'none',\n",
" 'noone',\n",
" 'nor',\n",
" 'not',\n",
" 'nothing',\n",
" 'now',\n",
" 'nowhere',\n",
" 'of',\n",
" 'off',\n",
" 'often',\n",
" 'on',\n",
" 'once',\n",
" 'one',\n",
" 'only',\n",
" 'onto',\n",
" 'or',\n",
" 'other',\n",
" 'others',\n",
" 'otherwise',\n",
" 'our',\n",
" 'ours',\n",
" 'ourselves',\n",
" 'out',\n",
" 'over',\n",
" 'own',\n",
" 'part',\n",
" 'per',\n",
" 'perhaps',\n",
" 'please',\n",
" 'put',\n",
" 'rather',\n",
" 're',\n",
" 'same',\n",
" 'see',\n",
" 'seem',\n",
" 'seemed',\n",
" 'seeming',\n",
" 'seems',\n",
" 'serious',\n",
" 'several',\n",
" 'she',\n",
" 'should',\n",
" 'show',\n",
" 'side',\n",
" 'since',\n",
" 'sincere',\n",
" 'six',\n",
" 'sixty',\n",
" 'so',\n",
" 'some',\n",
" 'somehow',\n",
" 'someone',\n",
" 'something',\n",
" 'sometime',\n",
" 'sometimes',\n",
" 'somewhere',\n",
" 'still',\n",
" 'such',\n",
" 'system',\n",
" 'take',\n",
" 'ten',\n",
" 'than',\n",
" 'that',\n",
" 'the',\n",
" 'their',\n",
" 'them',\n",
" 'themselves',\n",
" 'then',\n",
" 'thence',\n",
" 'there',\n",
" 'thereafter',\n",
" 'thereby',\n",
" 'therefore',\n",
" 'therein',\n",
" 'thereupon',\n",
" 'these',\n",
" 'they',\n",
" 'thick',\n",
" 'thin',\n",
" 'third',\n",
" 'this',\n",
" 'those',\n",
" 'though',\n",
" 'three',\n",
" 'through',\n",
" 'throughout',\n",
" 'thru',\n",
" 'thus',\n",
" 'to',\n",
" 'together',\n",
" 'too',\n",
" 'top',\n",
" 'toward',\n",
" 'towards',\n",
" 'twelve',\n",
" 'twenty',\n",
" 'two',\n",
" 'un',\n",
" 'under',\n",
" 'until',\n",
" 'up',\n",
" 'upon',\n",
" 'us',\n",
" 'very',\n",
" 'via',\n",
" 'was',\n",
" 'we',\n",
" 'well',\n",
" 'were',\n",
" 'what',\n",
" 'whatever',\n",
" 'when',\n",
" 'whence',\n",
" 'whenever',\n",
" 'where',\n",
" 'whereafter',\n",
" 'whereas',\n",
" 'whereby',\n",
" 'wherein',\n",
" 'whereupon',\n",
" 'wherever',\n",
" 'whether',\n",
" 'which',\n",
" 'while',\n",
" 'whither',\n",
" 'who',\n",
" 'whoever',\n",
" 'whole',\n",
" 'whom',\n",
" 'whose',\n",
" 'why',\n",
" 'will',\n",
" 'with',\n",
" 'within',\n",
" 'without',\n",
" 'would',\n",
" 'yet',\n",
" 'you',\n",
" 'your',\n",
" 'yours',\n",
" 'yourself',\n",
" 'yourselves'})"
]
}
],
"prompt_number": 316
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"year_range = [str(x) for x in range(1880,2014)]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 182
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"len(year_range)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 183,
"text": [
"134"
]
}
],
"prompt_number": 183
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we can initialize the ``SparkBloombergCaseVectorizer``.\n",
"\n",
"It is highly recommended to specify the ``numPartitions`` to be something like 4-5 times the number of cores. This splits up the shuffling tasks that can otherwise lead to memory problems. If the spark jobs start dying off with obscure memory or IO errors, try increasing the number of partitions. "
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"reload(sparkgram)\n",
"reload(sparkgram.document_vectorizer)\n",
"reload(bloomberg_ngrams)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 184,
"text": [
"<module 'bloomberg_ngrams' from '/cluster/home03/sdid/roskarr/python/bloomberg_ngrams/bloomberg_ngrams.pyc'>"
]
}
],
"prompt_number": 184
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create the RDDs needed for training the model -- only balanced panels, only rep/dem labels"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cv = bloomberg_ngrams.SparkBloombergCaseVectorizer(sc, year_range, JUDGE_BIO_PATH, CASE_DB_PATH, \n",
" ngram_range=[1,8], stop_words = sw, num_partitions=sc.defaultParallelism*2,\n",
" tokenizer = sparkgram.document_vectorizer.StemTokenizer(), \n",
" load_path = 'hdfs:///user/roskarr/raw')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Loaded 1 RDDs: \n",
"doc_rdd\n"
]
}
],
"prompt_number": 278
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now lets do the processing step by step to see how long each stage takes. First, construct the ``doc_rdd`` which reads all the case files and extracts the text: "
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"parse_context = bloomberg_ngrams.SparkBloombergCaseVectorizer.parse_context"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 279
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"homogeneous_panels = set(open('/cluster/home03/sdid/roskarr/work/chen_opinions/inputs/ddd_rrr_cases.txt', 'r')\\\n",
" .read().replace('\\n',' ')\\\n",
" .split())"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 280
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"###Only keep the cases that are:\n",
"1. labeled either rep or dem \n",
"2. are on the `homogeneous_panels` list\n",
"3. occur in more than five documents"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cv.doc_rdd = cv.doc_rdd.filter(lambda (context,_): parse_context(context, 'caseid') in homogeneous_panels)\\\n",
" .filter(lambda (context,text) : context.split('||')[10] == '0' or context.split('||')[10] == '1')\n",
"del(cv.ngram_rdd)\n",
"del(cv.vocab_rdd)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 281
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from operator import add\n",
"import numpy as np"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 282
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cv.ngram_rdd = cv.ngram_rdd\\\n",
" .flatMap(lambda (context, ngrams): [(ngram, (context, count)) for (ngram,count) in ngrams if (count >= 2) & (len(ngram) > 1)])\\\n",
" .aggregateByKey([], lambda a,b : a+[b], add)\\\n",
" .flatMap(lambda (ngram, meta): [(context, (ngram,count)) for (context, count) in meta if len(meta) > 5])\\\n",
" .aggregateByKey([], lambda a,b : a+[b], add)\\\n",
" .cache()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 283
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Subtract the circuit-year means: \n",
"\n",
"```\n",
"for each year : \n",
"\tfor each circuit: \n",
"\t\t\t\n",
"\t\tCOMPUTE a vector of means representing the mean counts for each feature \n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To generate an RDD of circuit-year sums:\n",
"\n",
"* map the `docvec_rdd` keys to `(year, circuit)` \n",
"* convert the `SparseVector` values to `scipy` `csr_matrix` and do a `reduceByKey` to add up all the circuit-year counts\n",
"\n",
"Then, to get the demeaned dataset: \n",
"* count the total number of opinions in each circuit-year and create an RDD of means\n",
"* create a demeaned RDD by subtracting the appropriate circuit-year mean from each opinion feature vector"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from scipy.sparse import csr_matrix, csc_matrix"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 284
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"circ_year_rdd = cv.docvec_rdd.map(lambda (context, vec): ((parse_context(context,'year'),parse_context(context,'circuit')), vec))\n",
"sums_rdd = circ_year_rdd.map(lambda (context, vec): (context, csr_matrix(vec.toArray()))).reduceByKey(add)\n",
"circ_year_counts = circ_year_rdd.countByKey()\n",
"means_rdd = sums_rdd.map(lambda ((year, circ), vec): ((year,circ), vec / circ_year_counts[(year, circ)]))\n",
"means_map = means_rdd.collectAsMap()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 289
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def subtract_mean(iterator, means_map) : \n",
" for context, vec in iterator :\n",
" year_circ_mean = means_map[(parse_context(context,'year'), parse_context(context,'circuit'))]\n",
" yield (context, csr_matrix(vec.toArray()) - year_circ_mean)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 290
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"demeaned_rdd = cv.docvec_rdd.coalesce(50)\\\n",
" .mapPartitions(lambda iterator: subtract_mean(iterator, means_map))\n",
"# .mapValues(lambda vec: vec.transpose())"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 291
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Output the data generated so far -- demeaned dataset, and the associated ngram_rdd and docvec_rdd"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from sparkgram import document_vectorizer"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 292
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"output_ngram = True\n",
"output_docvec = True\n",
"output_demeaned = True\n",
"output_vocab_map = True"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 293
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"if output_ngram: \n",
" document_vectorizer.SparkDocumentVectorizer.write_rdd(cv.ngram_rdd.coalesce(50), \n",
" 'hdfs:///user/roskarr/1-8gram_filtered_min5/training_set/ngram_rdd', \n",
" out_type = 'pickleFile', db_path = '/cluster/home03/sdid/roskarr/python/bloomberg_ngrams/rdds.db', \n",
" db_fields={'description': 'ngram RDD made of only balanced panels and min 2 occ. in document plus min. 5 in corpus'})\n",
"\n",
"if output_docvec:\n",
" document_vectorizer.SparkDocumentVectorizer.write_rdd(cv.docvec_rdd.coalesce(50), \n",
" 'hdfs:///user/roskarr/1-8gram_filtered_min5/training_set/docvec_rdd', \n",
" out_type = 'pickleFile', db_path = '/cluster/home03/sdid/roskarr/python/bloomberg_ngrams/rdds.db', \n",
" db_fields={'description': 'docvec RDD made of only balanced panels and min 2 occ. in document plus min. 5 in corpus'})\n",
" \n",
"if output_demeaned:\n",
" document_vectorizer.SparkDocumentVectorizer.write_rdd(demeaned_rdd, \n",
" 'hdfs:///user/roskarr/1-8gram_filtered_min5/training_set/demeaned_rdd', \n",
" out_type = 'pickleFile', db_path = '/cluster/home03/sdid/roskarr/python/bloomberg_ngrams/rdds.db', \n",
" db_fields={'description': 'demeaned RDD made of only balanced panels and min 2 occ. in document plus min. 5 in corpus'})\n",
" \n",
"if output_vocab_map:\n",
" document_vectorizer.SparkDocumentVectorizer.write_rdd(cv.vocab_map_rdd.coalesce(50), \n",
" 'hdfs:///user/roskarr/1-8gram_filtered_min5/training_set/vocabmap_rdd', \n",
" out_type = 'pickleFile', db_path = '/cluster/home03/sdid/roskarr/python/bloomberg_ngrams/rdds.db', \n",
" db_fields={'description': 'vocabmap RDD made of only balanced panels and min 2 occ. in document plus min. 5 in corpus'}) "
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 296
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Train the model on the demeaned dataset"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from pyspark.mllib.regression import LabeledPoint\n",
"from pyspark.mllib.util import MLUtils\n",
"from pyspark.mllib.classification import LogisticRegressionWithSGD, LogisticRegressionWithLBFGS, NaiveBayes, SVMWithSGD\n",
"from pyspark.mllib.regression import LassoWithSGD, RidgeRegressionWithSGD, LinearRegressionWithSGD"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 299
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Make an RDD of labeled points using the party labels"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"labeled_rdd = demeaned_rdd.map(lambda (context,vec) : LabeledPoint(parse_context(context,'party'),vec.transpose())).repartition(400).cache()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 347
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%time model_log_lasso = LogisticRegressionWithSGD.train(labeled_rdd, regType='l1', regParam=.001)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"CPU times: user 7.62 s, sys: 3.99 s, total: 11.6 s\n",
"Wall time: 9min 33s\n"
]
}
],
"prompt_number": 348
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from cPickle import dump"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 150
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"with open('/cluster/home03/sdid/roskarr/work/chen_opinions/model_log_lasso.dump','w') as f: \n",
" dump(model_log_lasso,f)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 151
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Plot the non-zero weights distribution to make sure there is some sort of even distribution"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from scipy.stats import gaussian_kde as kde\n",
"k = kde(model_log_lasso.weights[np.where(np.array(model_log_lasso.weights) != 0.0)])\n",
"xs = np.linspace(-.05,.05, 1000)\n",
"plt.plot(xs,k(xs))\n",
"plt.semilogy()\n",
"plt.title('feature weight distribution')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 349,
"text": [
"<matplotlib.text.Text at 0x2b72bb3bca90>"
]
},
{
"metadata": {},
"output_type": "display_data",
"png": "iVBORw0KGgoAAAANSUhEUgAAAncAAAGICAYAAAAj5sYzAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XeYVOXZx/HvTRdBUVEDiCBiAUsQW+yrxl4SezcaRY2x\nm1gTXEyMRqOixlgTNQYNIOqrsbexG1RsKDZKVLCjFOns8/5xn9FhmN2d2Z2ZM3Pm97muuWb3nDPn\n3PPs7Mw9T7UQAiIiIiKSDG3iDkBEREREikfJnYiIiEiCKLkTERERSRAldyIiIiIJouROREREJEGU\n3ImIiIgkiJI7qQlmdoCZjTWz78yswcwa4o5JWsbMUtHfcLsinW9KdL7Vi3G+YmgspkqJ1cz6RnFM\nzmd7XCotHpFyUXIniWdmmwB3AhsCTwG3Rrdyx1HUpKSGhYxbMc+ZtzIlDbliavXzNrNbo9h/0Zrz\nZMRTyPaiKuC5aEJXqSnt4g5ApAz2xr/IXBJCqI85lmInJbXoSGAZ4OMYYwhZ9+WyA9AemFaEc7Um\n9k+AdYGFRYijGBp7LpUWp0hZKLmTWrBadF8JTTMW3aSFQghxJnVpsfwNQwjFfA23+DmEEBYB7xcx\nltbK+VwqME6RslCzrCSWmdVHfeuOijbdku5vZ2YXZB27ftTE85GZzTezr8zsP401oZrZTmb2NzN7\n08ymm9k8M5tkZtfl6CfVN4pj22jTUxlxNJjZttFxR0W/39LU88kR+/fbzWxNM/uXmX1qZovM7NSM\n47qY2XlmNs7MZkX9D18zszPNrH0B5fpQdL2ts7ZvkPGcDs/a19XMFprZ1BznW9nMLjGzt81sjpnN\nNLMXzeyYRq7faPO2mfUws5uj5z/XzN4xs7PMrG0e/dXMzHY3s2ej8pkZPdeNsg6qByZFv6abZ9O3\nvJMvM1vVzG7IiHWCmZ1rZo1+6W6iL14PM7ssKsOZUfxTzOxeM9sv47gGvOYTlvx/+L5pM7PJ2cza\nRa+Zt6MYX8s+polY20aPfS/6/5hmZteb2co5js352s7Yv1Tza6HPpZHzbmhmI8xsqvn//WdmdreZ\nbdnI8eny75PPa0UkLqq5kyR7DbgN2BpYE3gO+DBjHwBRIvIP/P/hdeBFoCewC7CbmZ0YQrgh69zX\nAT2At4En8KayQcDxwAFmtmUIIV1jMCuKY1dgVeBh4LOMc2X+DM03lzW2f23gFWAGkAKWBb6LnmNv\n4LHomE+j/QHYArgM2MPMdgkh5NN89QReNjvgZZq2Y8bPOwD/yvh9O6At8GTmiczsx3h5rApMiX7u\nHMV1k5ltH0JYIlGMLNW8bWarAS/gNbVTgXuB5YF6YNNcj8l8OHAC8FvgeeABYHD0PLc2s8EhhA+i\nY18DxgD74eU7OuM8XzVy/iUvZtYrus7qeBPrvUA34IKMWBurWct+3j2imFbBk87H8GbI3sBPgQ5R\nvND4/wPAByypTRTXDvjr5c3oXI3GkvXYu/DX/JNRfNsCxwG7RP8fnzb33JrZX8hzWeq8ZrYv3he3\nPf5//xTQD/g5sLeZnRRCuL6RGPJ9rYjEI4Sgm26JvuGDJxqAI3PsGwQsAKYD22ft2zzaPh9YO2vf\nXkDXrG1t8ESiAXgox7VS0b5tG4nzqGj/PxrZnz730Ea2NwA3AG2z9hvwUrT/UqB9xr7lgYeifcPy\nLM/B0fGprO33AXOB94D/Ze27InrM0RnbOuNN5YuBU7OO74knqks8pqlyjK7fAIwCOmRsXwtP9hqi\na62e9bgp0b45wDYZ29sBd0f7/p71mD7R9kktfE3eGz3+XqBjxvYBeLLfVKxLbMcTwgbgrzmusyyw\neb7/D9H+vhmvp4nZMWQdM6mJx04F1srY1wn4v2jfmHxe283FXMBzyY6zBzAzKsshWft+jifHC4AN\nWvta0U23OG5qlpVadx7+xnx6COGpzB0hhP8Cf8C/2R+fte/+EMKsrG0NwQdsTAN+ambLljLwHL7C\nn8firO27AZvhydhZIaN2LoQwAzga/yA7Mc/rvAZ8C2xuZp3Am+DwmpkXgAeB3mbWP+MxO+A1Hpk1\nd0fhSdI/QwhXZV4ghDANGBL9elJzAZlZX2BPYB5wUghhQca5PsD/js0ZHkJ4NuNxi4CLol/rsi+Z\nx/kai7UPPshnPnBiCGF+xjUnAH8s8JSrRPePZe8IIXwXvY5b6twQwkctfOwfQkYNVghhHvBrYBHw\nsyaax8thCNAFeDyEcFPmjhDCvXitczvglEYeX8hrRaTslNxJzTKzNnhTyiLgnkYOS7+Bb57j8X3M\n7EQzG25mf4/6Bd2Kfyi0BfpnP6bEHg8hzMmxfbfofkyOfYQQPsObtFYys7Wau0gIIeC1Zx3xZjGA\njYHl8OQtncDtAGBm3fFpaKaEEP6XI667GrnU63iz54Zmlt0cmG2b6P6ZEMIXOfbf0czjwWsws6Wb\n1nvm8fh8pftePhMlsdluL/B8L0f3F5vZXmbWueWhLSHgNW0tfeyIpTaG8An+2mnDD6+dOKT/Brc1\nsv8fWcdlK9drRaRFKq7PnZn9Df8GvjzexPMwcFoI4dtYA5MkWgnoGv38rVmTlTFLdAI3sz8C57D0\nF6TMvlLLFSHGQvyvke39ovtrzOyaJh4fgO4s3V8plyfx5qsdgcf5ob/dk3g/xMXRthuB7TP25Yrr\n/mbKPuB/q1x9tNJ6Rfc5yyCEMNPMZvLD3zvXNZYahRtCmBXF1lxyWYh0rFNyBhLCjGZizXYbXlt0\nJJ6MLTKzN/E+ZP8KIbzRwji/yKxVLNC32TXbGdJ/o16N7C+H9LUbGxAyOeu4TOV8rYi0SMUld8A1\nwJkhhLlmthLeWfli4FfxhiUJ1Da6X0DzNTvfd5Q3s/3x5twZwGn4h+in6eZOM3sB+AnFny6juZr2\nuY1sTz/PJ2h+briv84wlnailE7cd8IEjY0MIi83sVX5ontoh6zHZcf0f8E0z11vQzP60pjrkN7cq\nSVWuWhLVpB5lZn/GvxhvD2yJ940808z+EELIOQq1GY29nuJQaa1MVflakdpRccld1OckrQ3+T/RJ\nTOFIsn2F93tqCxwf8hspCrB/dH9+CCFXs05Lm2PTCUyXRvb3buF50wndHSGEnNOsFCqE8I6ZfQEM\njqa22Ap4MqO/31PAZma2Ibn726XjWhu4Oru/Ywukp1jpk2unmS2Hj0athAmk0+9nfXPtNLNueK1v\nQbFG750TgMuiPpD74wMOfmdmd4QQ3mtpwC3Qzcy6NlJ71ze6z5wWp1Sv/cZMBdbBR9q+mGN/v4zj\nRKpOpX0bAsDMzjGzWcDn+GjFP8UckiRQ1An6MfxLzj4FPHTF6H6pLx1mtiPetJnrgzn9AdbYl6r0\nB8k6Oc7bgZZ31E73DzqghY9vzJP4czkXHwmZmbw9Ed3/Ah+t+m4I4fMSxpWekmVbM1slx/5DinCN\nTM39LZuS7se5bTSNSbbDWhbSD0IIi0MII6NrGbB+xu7WxJ4vAw5daqNPAbMt/qU9cxqd9Gt/3RyP\nWQmvhcylpc/l6ej+yEb2H511nEhVqcjkLoRwSQihKzAQ/2afz0g3kZa4EB9Q8Tcz+1n2zmgi1u3N\nLHNARbp2eYhlTDgbjdi8Lv1rjmulk8GBjcTyMj6AYINoDq70eTsAw2mkVioP9+AjXHc1syvMbKm+\nXNFkr4UmFelk7oTo/omMfc/htaK/yjo20414mRxvZmfnGjRhZgPNrNnEO/jKDQ/iy5Jdk3muaNTu\n0ObOUaAv8ekyVo1q2vIWDSq5Hx+Qcq2ZdUzvM7N1gd8Xcj4zO9LMBuXYvhrwY/yLRuaI1+Zeh8Uy\nNHPEdDSy+lp89Pn9WaNwU1Gcu5nZZhmP6QrcTOP9D1v6XG4CZuOj2o/N3GFmewOH43/fqws8r0hl\naM08Kvg39tH4xJkNwOQmjm0DnA68i/fl+Aj4C9C5mWvsD3wc53wxulX3jebnwjo0ek024IMJ/oP3\nwXsCrzluAI7LOH5NfCqQBrzj9Sh84M8cvDnyOXLPw7ZXtH0uPifbzdFt7YxjzouOWYR/4N2DN19O\nA/5O0/Pc5ZwjLDqmNzA+Ou4bvEZiBN7f7f1o+wsFlms/fpjT7Msc+5/K2L9PI+fYMHovaMBr6h/H\np6H4D97xvgFvTs58TKqR8u0dlVVDdD8Sn2B2Dj4id0q070dZj0tvX2o+t2h/A7A4x/YxGa+BEdHf\n8uI8y65XxnU/iWJ9MHpt3B2dc6mYcsXKD3PmpZPGfwGP8sNr+s6sc/w4en0tAh6JXlc3A1tE+/vS\nzBx+jR2TsX1K9NqdG/0NRuKv4XR59cxxzhv54f/j0eg18AU+QOcecs9z1+LngtfWz4v2vxr9DZ/n\nh/+/43M8pkWvFd10K/etdQ+O3tSjf6qvm3kzuCo6/i7gGOByvEr9CcCaeNyhwIS4C0q36r0Bt+Cj\nN3Mmd9Ex/YG/4l8+ZuMTnL4XfXAeA6yQdfyaeFL3MV7b9g5eO9QBT2oWk2OyYryW67XoGg25jsPn\nm3s7+pD7DB8N2ROfrHYxSyd3ObfnuHYnfN6uZ/GkdV4U//PAMGD9FpTtlOjao3Ps+13GB2W3Js7R\nDTgfGIsPUpmDJwBPAWcDa2Qd31T59og+3D+NzjMB/xLaAa9JXEjGBMfRY9ITKRea3K2I1wD9L3ov\nK2hSY3xVjhvxpGdO9Nr7Hd7EmDOmXNvxaWCGA/+NXi9zo5geBQ4kx/srvrrGi1F5p1+HR0b7+jb3\nXBo7JnM73pf19/j/0Ty86fV6YJVGzpk+/sOM1+Y1eP/DRv+HW/Nc8C8XI6K/wTz8C8bdwJaNHN+i\n14puupX7ZiG0vH+xmfUNIUyJfh6P18L1y3HcesBb+KzkB2RsPwmv9j4shHBn1Ol5X/xb2kxgA3x5\nmFEhhGEtDlREapqZbYUnteNDCBvGHY+ISCm1qs9dOrHLQ7oz8/Cs7Tfh31jTa0cGvDPxJPxb2Ch8\nQk/1uRORJkX9I5dauN3M1sFryKDxSWtFRBKjVTV3S5yo6Zq7R/DpEDqHrOkmzOx5fP3BXCPcRETy\nYmZd8Br/KXhT4Ex8EMrGeJPf08BPw9LLs4mIJEq5Rsv2BL7KTuwiU4HumaMORURaYC5wCd4PeGN8\nBY118FHIpwE7KbETkVpQroSqM96ZOZd5GcfMLE84IpI0UeJ2XtxxiIjErVzJ3Rx8YtdcOuF97XIt\neN4sM6uEGedFRERE8hJCKPbylEsoV7PsNLzptX2Ofb3wJttFLT153EOOa+12wQUXxB5Drd1U5irz\nWripzFXmtXArh3Ild2PxDs2Zs/ynZywfBLxSpjhEREREEq1cyd1IvOn1tKztQ/DlgkaUKQ4RERGR\nRGtVnzszO4If1rtcGWhvZr+Lfp8SQvgXQAhhvJldC5xkZmPwBcMHACcDqRDCHa2Jo76+nrq6Ourq\n6lpzGsmTyrn8VOblpzIvP5V5+anMyyeVSpFKpcpyrdauUPEUsF30a/pE6U6CqRDCDhnHtsFr7o7D\nl4T5Eq/RGxpCaNFgiui8oVxt2CIiIiKtYWaEEg+oKNokxnFRciciIiLVohzJXbn63ImIiIhIGSQi\nuauvry9bO7aIiIhIoVKpFPX19WW5lpplRURERMpEzbIiIiIiUhAldyIiIiIJouROREREJEESkdxp\nQIWIiIhUMg2oKIAGVIiIiEi10IAKERERESmIkjsRERGRBFFyJyIiIpIgSu5EREREEiQRyZ1Gy4qI\niEgl02jZAmi0rIiIiFQLjZYVERERkYIouRMRERFJECV3IiIiIgmi5E5EREQkQRKR3Gm0rIiIiFQy\njZYtgEbLioiISLXQaFkRERERKYiSOxEREZEEUXInIiIikiBK7kREREQSRMmdiIiISIIouRMRERFJ\nkEQkd5rnTkRERCqZ5rkrgOa5ExERkWqhee5EREREpCBK7kREREQSRMmdiIiISIIouRMRERFJECV3\nIiIiIgmi5E5EREQkQZTciYiIiCSIkjsRERGRBFFyJyIiIpIgiUjutPyYiIiIVDItP1YALT8mIiIi\n1ULLj4mIiIhIQZTciYiIiCSIkjsRERGRBFFyJyIiIpIgSu5EREREEkTJnYiIiEiCKLkTERERSRAl\ndyIiIiIJouROREREJEGU3ImIiIgkiJI7ERFg0SI4/XTo0gUGD4bx4+OOSESkZZTciYgAv/oVvPMO\nfPghnHwy7LwzfPRR3FGJiBTOQghxx9AqZhaq/TmISLwefhhOPBHeeAO6dvVtF14Ir78Od98db2wi\nkixmRgjBSnmNRNTc1dfXk0ql4g5DRKpQCHD++XDFFT8kdgBnnQWvvQYvvhhfbCKSHKlUivr6+rJc\nSzV3IlLTHn0UzjgD3nwT2mR93b36anj+eRg5Mp7YRCR5ylFzp+RORGraAQfAT38Kxx+/9L5Zs6BP\nH++L96MflT82EUkeNcuKiJTQN994zd2BB+be37Ur7LknjB5d3rhERFpDyZ2I1Ky774addoIVVmj8\nmIMPhn//u3wxiYi0lpI7EalZ990H++zT9DE77eTNsp9+Wp6YRERaS8mdiNSkuXMhlYLddmv6uPbt\nYccdvflWRKQaKLkTkZqUSsGgQbDiis0fu9tu8NBDJQ9JRKQolNyJSE1KpbxGLh+77gqPPQYNDSUN\nSUSkKJTciUhNSqVgu+3yO7ZXL+jWDd59t6QhiYgUhZI7Eak5s2b5IInNN8//MVtt5RMai4hUOiV3\nIlJznn8eNtkEOnXK/zFbbw3PPVe6mEREikXJnYjUnEKaZNO22krJnYhUh4pK7sysg5ndZGYTzWym\nmb1nZifFHZeIJMsLL3hNXCEGDPAVLTTfnYhUuopK7oB2wKfATiGE5YADgd+Z2QHxhiUiSbF4Mbz2\nmjfLFqJNG9hyS3jxxdLEJSJSLBWV3IUQ5oQQhoYQJkW/vwHcBxT4HVtEJLcJE6BHDx/9WqiNN4Zx\n44ofk4hIMVVUcpfNzNoD2wJvxB2LiCTDK68UXmuXttFGXusnIlLJKjq5A/4KzAD+GXcgIpIML78M\nm27asscOHqyaOxGpfK1K7szsXDMbbWaTzKzBzCY3cWwbMzvdzN41s7lm9pGZ/cXMOjdy/BXA5sBu\nIYRFrYlTRCStNcld796wYIEGVYhIZWttzd1FQB3wAfANEJo49krgcmA8cBIwGjgFuN/MLPNAMxsO\n7AjsGEKY3soYRUQAT8zGj/fm1ZYwU9OsiFS+dq18fL8QwhQAMxsPNFYLtx5wMjAmhHBAxvbJwNXA\nwcCd0barge2B7UMIX7cyPhGR7733Hqy+Oiy7bMvPMXiwJ3e77168uEREiqlVNXfpxC4Ph0T3w7O2\n3wTMAQ4HMLM+eK3emsBkM5sV3R5oTZwiIgBvvQUbbNC6c2y0kfrdiUhlK9eAik2BxcDYzI0hhPn4\nSNhNo9//F0JoE0LoHELomnHbo0xxikiCjR/f+uRugw3g7beLE4+ISCmUK7nrCXwVQliYY99UoLuZ\ntbaJWESkSW+9Beuv37pzrLUWTJkC8+cXJSQRkaIrV3LXGWjsrXBexjEiIiVTjJq7jh1hjTXg/feL\nE5OISLGVq7ZsDtC9kX2d8FG2c1p68vr6+u9/rquro66urqWnEpGEmjULvvgC+vVr/bkGDoR33ml9\noigiyZdKpUilUmW9poXQ1OwlBZwoGi0bQljqrdPMHgF2iPYvzNr3PNA/hLBqC68bivUcRCS5XnwR\nTj7ZV6horaFDfVqUYcNafy4RqS1mRgjBmj+y5crVLDsWaItPSvw9M+sEDAKK8HYrItK48eNb398u\nbeBADaoQkcpVruRuJN70elrW9iHAMsCIMsUhIjWqGNOgpKWbZUVEKlGr+tyZ2RFAn+jXlYH2Zva7\n6PcpIYR/AYQQxpvZtcBJZjYGeAgYgE9snAoh3NGaOOrr69XXTkSaNH487Llncc61zjowaZKveNGh\nQ3HOKSLJVs6+d63qc2dmTwHbRb+mT5RuR06FEHbIOLYNXnN3HNAX+BKv0RsaQmjxYAr1uRORfKy6\nqk8+3KtXcc631lpw330wYEBxzicitaEcfe6KNqAiLkruRKQ5M2ZAz54we7YPhCiG3XeHE06Avfcu\nzvlEpDYkaUCFiEhsJk6E/v2Ll9iB19x98EHxziciUiyJSO7q6+vLPoeMiFSPiRNhzTWLe04ldyJS\niFQqtcS8vKWkZlkRSbyLL4ZvvoFLLy3eOR95BC67DB5/vHjnFJHkU7OsiEgRqOZORGqJkjsRSbwP\nP/Q+d8W0+urw+ecwb17zx4qIlJOSOxFJvFIkd+3aQZ8+XisoIlJJEpHcaUCFiDRm7lz46itYbbXi\nn1tNsyKSLw2oKIAGVIhIU95+G/bbD959t/jnPv10nz/vt78t/rlFJJk0oEJEpJVK0SSbppo7EalE\nSu5EJNFKMVI2TcmdiFQiJXcikmiquRORWpOI5E4DKkSkMaVM7nr3hi+/1HQoItI8DagogAZUiEhT\n1lwTHnoI1l67NOfv3x8eeADWWac05xeRZNGAChGRVliwAD75BPr2Ld01+vaFKVNKd34RkUIpuROR\nxPrf/6BXL+jQoXTXWGMNmDy5dOcXESmUkjsRSaxSjpRNU82diFQaJXciklilHEyRppo7Eak0Su5E\nJLE+/FA1dyJSexKR3GkqFBHJZeJE1dyJSGXQVCgF0FQoItKYAQNg9GhYf/3SXaOhAZZdFr76yu9F\nRJqiqVBERFpo8WJvLu3Xr7TXadMG+vRR06yIVA4ldyKSSFOnwoorQufOpb/WGmsouRORyqHkTkQS\nqRwjZdP69lW/OxGpHEruRCSRyjHHXZpq7kSkkii5E5FEUs2diNQqJXcikkjlTO5UcycilSQRyZ3m\nuRORbOVsllXNnYg0R/PcFUDz3IlIthCga1f45BPo1q1815s6FZZfvvTXE5HqpXnuRERa4PPPoVOn\n8iR2AGZahkxEKoeSOxFJnHIsO5ZNy5CJSKVQciciiVPOwRRpffrA//5X3muKiOSi5E5EEufDD8s3\nmCJNyZ2IVAoldyKSOHE0y/btq+RORCqDkjsRSZy4mmU1oEJEKoGSOxFJHDXLikgtU3InIokyfTos\nWgQrr1ze666yCsyZA7Nnl/e6IiLZEpHcaYUKEUlL97ezkk4RujQzWH111d6JSG5aoaIAWqFCRDLd\neSfcfTeMHl3+a++yC5x6Kuy+e/mvLSLVQStUiIgUKI6RsmkaMSsilUDJnYgkShwjZdM0YlZEKoGS\nOxFJlIkTyz9SNk0jZkWkEii5E5FEibPmTs2yIlIJlNyJSGLMng3ffgs9e8ZzfTXLikglUHInIokx\naRL06wdtYnpn69HD59mbNy+e64uIgJI7EUmQOJtkAdq2hdVWg48/ji8GEREldyKSGHEsO5ZNTbMi\nEjcldyKSGHHOcZemEbMiEjcldyKSGHE3y4JGzIpI/JTciUhiVEqzrJI7EYmTkjsRSYR58+Dzzz25\nipP63IlI3JTciUgiTJzoTaLt2sUbh2ruRCRuiUju6uvrSaVScYchIjF6/31Ya624o/CpUD77DBYt\nijsSEakkqVSK+vr6slzLQghluVCpmFmo9ucgIq136aXeLHv55XFHAr17w7PPek2iiEgmMyOEYKW8\nRiJq7kREKqXmDtQ0KyLxUnInIonwwQew9tpxR+E0HYqIxEnJnYgkQqXV3GnErIjERcmdiFS9WbNg\nxgzo1SvuSJyaZUUkTkruRKTqpVemaFMh72hqlhWROFXIW6GISMtVUpMsqFlWROKl5E5Eql4lDaYA\nWH11+OQTaGiIOxIRqUVK7kSk6lVazd0yy0C3bj6ZsYhIuSm5E5Gq9/77lVVzB2qaFZH4KLkTkaoW\nArzzDqy3XtyRLEkjZkUkLkruRKSqffQRdOkCK6wQdyRL0ohZEYmLkjsRqWpvv115tXagZlkRiU/F\nJXdmdqCZPWdms8xsctzxiEhlq+TkTjV3IhKHikvugOnA1cD5cQciIpVPyZ2IyJIqLrkLITweQhgF\nfBR3LCJS+So1uevb15tlNdediJRbxSV3IiL5amiACRNg4MC4I1la166w3HIwbVrckYhIrVFyJyJV\na8oUnyy4W7e4I8ltrbV89QwRkXJqVXJnZuea2Wgzm2RmDU0NgDCzNmZ2upm9a2ZzzewjM/uLmXVu\nTQwiUrvGjYPBg+OOonFK7kQkDq2tubsIqAM+AL4BQhPHXglcDowHTgJGA6cA95uZtTIOEalBr74K\nG28cdxSNU3InInFobXLXL4SwcghhF+DTxg4ys/WAk4ExIYT9Qwh/DyGcCZwBbA8cnHFsGzPrBLT3\nX62jmXVsZZwikkCVntytvbaSOxEpv1YldyGEKXkeekh0Pzxr+03AHODwjG1HRttGAr2BucCElkcp\nIkkUQuUnd6q5E5E4lGtAxabAYmBs5sYQwnzgjWh/etutIYQ20a1tdN+vTHGKSJWYMgU6dYIePeKO\npHH9+8OkSbB4cdyRiEgtKVdy1xP4KoSwMMe+qUB3M2tXplhEJAEqvdYOoHNnWGkl+PjjuCMRkVpS\nroSqMzC/kX3zMo6Z2ZKT19fXf/9zXV0ddXV1LTmNiFSRV16p/OQOfmia7ds37khEJA6pVIpUKlXW\na1oITQ1wLeBEZuOBzrmaUM3sLaB7CGGpBhQzGwXsB3QMISxqwXVDsZ6DiFSPLbeEP/wBdtwx7kia\nduKJsO66cMopcUciIpXAzAghlHSWkHI1y07Dm17b59jXC2+yLTixE5Ha9N138MYbsMUWcUfSvPXX\nh7feijsKEakl5UruxgJtgc0zN0ZTngwCXilTHCKSAC++CBtt5H3aKt0GGyi5E5HyKldyNxKf4Pi0\nrO1DgGWAEa05eX19fdnbs0UkPk8/DdttF3cU+Vl/fXj7bV8HV0RqVyqVWmKMQCm1qs+dmR0B9Il+\nPRmfePiK6PcpIYR/ZRx7Nb4yxT3AQ8CA6DHPhRB2aEUM6nMnUmO22QZ+/3vYeee4I8lP796ekPbT\npE4iNa8cfe5am9w9BaS/P6dPlA44lZm0mVkbvObuOKAv8CVeozc0hDCnFTEouROpIbNmQc+e8Omn\n0KVL3NHw2U+EAAAgAElEQVTkZ/fd4YQTYO+9445EROJWjuSuVVOhhBC2L+DYBrxW74rmjhURacyj\nj/pI2WpJ7OCHfndK7kSkHMrV505EpCj+8x/Ya6+4oyiMBlWISDklIrnTgAqR2rBwITzwAOy5Z9yR\nFGajjXxFDRGpXVUzoKISqM+dSO146CGfuPiFF+KOpDCLF8MKK8Dkyb4cmYjUriRNYiwi0mojRsCh\nh8YdReHatoVNNoGxY+OORERqgZI7EakKX3/tTbIHHRR3JC2z+ebw3//GHYWI1IJEJHfqcyeSfDff\nDD/7Gay8ctyRtMwWW1Rfc7KIFI/63BVAfe5Ekm/RIp8A+J57YOON446mZb791icz/uor6Ngx7mhE\nJC7qcyciAtx5J/TtW72JHUC3brDuumqaFZHSU3InIhVt/nwYOhQuuijuSFpvxx3hiSfijkJEkk7J\nnYhUtOuvh/XW8/Vkq92uu/okzCIipaQ+dyJSsaZOhUGD4OmnYeDAuKNpvUWL4Ec/8gmN+/SJOxoR\niYP63OVJo2VFkicEOPFE+PWvk5HYAbRr50un3XNP3JGISLlptGwBVHMnkkzDh8Ntt8FLLyVrdOlj\nj8HZZ8O4cXFHIiJxUM2diNSkJ56ASy7xGq4kJXbggyqmT1dyJyKlo+RORCrK5Mlw2GFwxx0+/UnS\ntGkDxx0H11wTdyQiklRqlhWRivHdd7DVVnD00XDqqXFHUzrffAP9+3vtnQZWiNSWcjTLKrkTkYoQ\nAhxyiDfD3norWEnf+uJ3/vnw8cfwz3/GHYmIlJOSuzwouRNJhssug1Gj4JlnYJll4o6m9GbP9hUr\n/v1v2HrruKMRkXLRgIo8aSoUker2yCNw5ZVw9921kdgBdOkCl18OJ5wAc+fGHY2IlJqmQimAau5E\nqtvkyfCTn8BddyVjFYpChOCDR7p0gRtvjDsaESkH1dyJSKLNmwf77w/nnVd7iR14v8IbbvAVOK67\nLu5oRCQpVHMnIrE54QSf823kyOQPoGjKpEne7+6vf4V99407GhEppXLU3LUr5clFRBpz++3w1FPw\n8su1ndgB9OsHDzwAu+zio4X32CPuiESkmqnmTkTKbuJE72f35JOwwQZxR1M5/vtf2Htv+NvfYL/9\n4o5GREpBNXcikjiLF/skxeeeq8Qu2+ab+8jh3XaDGTPgl7+MOyIRqUZK7kSkrK66yu+TvAJFawwa\n5M3Vu+8OU6bAsGFqthaRwiRitKzmuROpDu+8Axdf7CtQtG0bdzSVa9114aWXvBbvyCNhwYK4IxKR\n1tI8dwVQnzuR6rBwIWy5JRx7LBx/fNzRVIc5c+Dww30t2rvugpVWijsiEWktzXMnIolxySWw4opw\n3HFxR1I9OneG0aNh00399vrrcUckItVANXciUnKvvebTfIwbB6utFnc01WnkSDj5ZBg+HA49NO5o\nRKSlylFzp+ROREpq/nyvdfrNb7z/mLTcW2/BPvvAXnvBpZdC+/ZxRyQihVKzrIhUvQsvhDXWgCOO\niDuS6rfBBj7p8/vvw7bb+rq8IiLZlNyJSMmMHQs33+zrp2o6j+JYYQW4/3448ECfF++OO+KOSEQq\njZplRaQk5s6FwYOhvh4OOijuaJLptdfgkENgs83g2muha9e4IxKR5qhZVkSq1hlnwI9/rMSulDba\nCF59FTp18smPn38+7ohEpBIouRORorvzTnj8cW+OldJadlm48Ua44grYf384/3xNeixS6xKR3GmF\nCpHK8d57cMopPj/b8svHHU3t+NnPfB68N96ALbaACRPijkhEMmmFigKoz51I5Zg+HbbayptkhwyJ\nO5raFALcdBOcdx4MHQonnQRtEvE1XiQZNM9dHpTciVSG+fNh5519Tru//CXuaOSDD3z6meWWg1tu\ngV694o5IREADKkSkSixY4KM2V1nFJ9eV+K21Fjz3HGy9tY9aHjUq7ohEpFxUcycirbJgARx8MCxa\n5Ivbd+gQd0SSbexYOPxwnxfv2mu9Nk9E4qGaOxGpaN98A7vvDg0NSuwq2Wab+Zx4yyzjCd7778cd\nkYiUkpI7EWmRiRNhyy19SawxY5TYVbr0lCmnn+5NtQ8+GHdEIlIqSu5EpGD//rdPt3HKKXDlldC2\nbdwRSb6OOw7uucdHM19yiY+uFZFkUZ87Ecnb11/7NCcvveRrmm68cdwRSUt98gnssw+ss46v/9up\nU9wRidQG9bkTkYrQ0OAJwMCB3hl/3DgldtVutdXg6adh4ULYfnv47LO4IxKRYlFyJyKNWrjQm2A3\n3BD+8Q946CG45hrvvyXVr3Nn//vuuqsPtHj99bgjEpFiULOsiCxhxgx44QV49FFfI3bttX21g112\nAStpQ4LEafRoOPFEX93i5z+POxqR5CpHs2y7Up5cROL38cfwwAPw4oswaRJ88YU3szY0eLLWtu0P\ny1N9+SXMnetTZ2y/PTzzjCd3knwHHABrrOH98CZMgHPOUTIvUq1UcyeSUK+95muLvvAC7LEHbLst\n9O8Pq64K7dr5B3cIPyR6DQ2+wkT37vpQr2VTp3rNnQZaiJSG1pbNg5I7kSV9+63PZfbww/C738HR\nR3vfKpF8zZnjr5spU+Cf//RET0SKQ6Nl81RfX08qlYo7DJHYpVLw4x/7gIf334df/1qJnRQuPdDi\niCNgq63gj3/0hE9EWi6VSlFfX1+Wa6nmTiQBFi+GYcPg73/3prTddos7IkmKyZPht7/1uQ3PPRd+\n8Qvo0iXuqESql5pl86DkTmrdp5/CoYf6wIgRI7xPnUixjR3rK1o88wwcdZTXCq+xRtxRiVQfNcuK\nSJMeeMAnE66rg0ceUWInpbPZZnD33fDKK/77ppvCvvv6RMj6fi1SWVRzJ1KFJk3yqSrGjfPF4HfY\nIe6IpNbMng233+5rC6++OlxxhU92LSJNU82diAD+Qfrmm97suu++XosycCC89ZYSO4lHly7wq1/B\n22/73Hg//an3+1y0KO7IREQ1dyIVJARfAuqZZ/z+vfdg4kSYNcv7N62zDuy5J+y/v6/xKlIppk2D\nI4/0ORTvukuDLkQaowEVeVByJ0mweLHXyl18MSxY4Et9DRoEAwZAv37Qo8cPq0iIVKpFi+CEE/yL\nyUMPwcorxx2RSOVRcpcHJXdS7T780Gs8zOAPf/Blv7RChFSrEHzKlMceg6eeUg2zSDb1uRNJuCef\nhC23hIMPhmef9f5zSuykmpl5DfSmm/oyZvPnxx2RSO1RzZ1ITEaOhFNOgVGjYLvt4o5GpLgWL4YD\nDoAVV4SbbtKXFpE01dyJJNT998Opp8Ljjyuxk2Rq29bXpX3pJbj++rijEaktqrkTKbNnn4X99oP/\n/MenNBFJsg8/9K4HY8bANtvEHY1I/FRzJ5Iwb7zh05jccYcSO6kN/ft7Dd7BB/t0KSJSehWV3JlZ\nOzO7ysy+NrNvzOxmM+sYd1wixTBxIuy+O/z1rz7hq0it2HVXOPFE/2KzYEHc0YgkX0Uld8B5QB2w\nPrAWMBC4NM6ARIph2jTYeWcYOtQ7mYvUmnPPhVVWgdNOizsSkeSrqD53ZvYR8JsQwqjo952B0UC3\nxjrWqc+dVLqvv/ZBE4cd5h9wIrVq5kyfIuWcc+Doo+OORiQeNTWJsZl1A6YD64YQ3o+2rQx8DqwZ\nQpjcyOOU3EnFmjHDa+y22w7+/GdNByEyYQJsuy383//5QAuRWlNrAyq6RvffZmz7NmufSNX4/HOo\nq/OBE0rsRNyAAXD77bDPPvDmm3FHI5JMLU7uzOxcMxttZpPMrMHMctasRce2MbPTzexdM5trZh+Z\n2V/MrHPGYbOi++UztnXL2idSFd58E7baCn72M7j6aiV2Ipl23dX/L3bd1dehFZHiak3N3UX44IcP\ngG+AptpGrwQuB8YDJ+H96E4B7jfzj70QwrfAx8BGGY8bjCd2U1oRp0jZNDTAddfBjjvCBRdAfb0S\nO5FcDjoIhg+HnXaCBx+MOxqRZGnXisf2CyFMATCz8UDnXAeZ2XrAycCYEMIBGdsnA1cDBwN3Rptv\nBs41s2eBRUA9cIs61UlLzZoFN9wAd98Nkyb5tnXWge23hwMPhIEDi3OdEHy1ibPPhk6dfKLiddct\nzrlFkurAA2G11XyKlAMPhIsugmWXjTsqkerX4pq7dGKXh0Oi++FZ228C5gCHZ2z7E/AM8DZeIzge\nOLulMUpte/xxWH99ePVVuPBCGDfOf/7d73ygw047wcYbwzXXwFdftewa770Hl10G660Hp58OZ50F\nzz+vxE4kX1tuCW+95aPK11sPbr4ZFi6MOyqR6laU0bLpmrsQQr8c+x4Bdoj2L8za9zywVghhlVZc\nWxV7spQRI+DMM73j9k475T5m8WJ48km49VZ44AGfWPjww72v3MorL3lsCDB9OnzwgSeJL70EL7wA\nc+fCnnvCoYf6CEA1wYq03HPPeVeGSZP8y9LRR0OXLnFHJVJcVTMVSjPJ3VtA9xBCjxz7RgH7Ax1C\nCItaeG0ld7KEe++FX/8aHnss/2bXGTNg1Ci/vfwyLLMMdO8OHTvCd9/BZ595gte/PwwaBJtvDj/5\nidcMKqETKa7nn4crr4SnnoJf/hJOOgn69Ik7KpHiKEdy15o+d/nqDMxvZN+8jGNmtvQC9fX13/9c\nV1dHXV1dS08lVe7NN2HIEO+gXUh/uuWX98cNGeKDIqZN85q6+fO9D9Aqq8BKKymREymHrbby2+TJ\nvlzf4ME+SOm002CLLfR/KNUllUqRSqXKes1KqLnbD+iomjtprfnzvQ/dWWfBkUfGHY2IFMvMmd59\n4qqrvEb9tNN8EEb79nFHJlK4pExiPA3obma5/g17AV+1NLETyTR0qI+EPeKIuCMRkWJabjk45RR4\n/3047zwfAd+vH1xyidewi8iSypHcjQXaAptnbjSzTsAg4JUyxCAJN3Ys/POfPsecmmxEkqltW58Y\nPJWC++7zpczWXBN+/nPvo/fYYz4YQ6NtpdaVo8/dSOA84DTguYztQ4BlgBGtvUB9fb362tWwEOCM\nM+BPf/K+cSKSfBttBLfdBl9+CU88Ac88A/ffDxMnwqefeh/ZXr2gZ0+/793bB0FtsYUPmBIpt3L2\nvWtxnzszOwJIj186GWgPXBH9PiWE8K+MY6/GV6a4B3gIGBA95rkQwg4tC/37c6vPXY27915vkn3t\nNf9mLyK1bdEiX9t56tQfblOm+PRF48fDLrt4v9w99oA2lbTCutSEip4KxcyeAraLfk2fJB1sKjNp\nM7M2eM3dcUBf4Eu8Rm9oCGFOiwL44dxK7mrYwoU+HclVV/k6lSIiTZk+HcaMgRtv9IEaZ54JRx0F\nHTrEHZnUiopO7iqFkrvadt11vrTYo4+qr52I5C8EePppuPhib8q9+GIfgav3ESk1JXd5MLNwwQUX\nqM9dDZo1C9Ze2+e022ijuKMRkWr1+OPw29/6pOXDh3vfPJFiS/e5GzZsmJK75qjmrnYNHeqTnN5+\ne9yRiEi1a2jw95LzzoPtt/dpVlZbLe6oJImSMs+dSNFNmwbXXgt//GPckYhIErRpA7/4Bbz3ni91\n9uMfw4UXwpxW9QoXiYeSO6lKF1wAxxyj9SZFpLi6dIGLLoJXX/WRtQMGwL//7X30RKqFmmWl6rz9\ntjebvPcerLBC3NGISJI984wvd7bMMlBf72vcavoUaY1yNMuWYxLjktMkxrXlnHPg3HOV2IlI6W27\nLbz8svfHO+ssmDED9twTttnGWw5WWMHXtf7uOx/kNXOm39q0gVVX9amaevWK+1lIJaiKSYwrhWru\naksqBUcfDe++6yPbRETKJQR4/XVf5uyFF3xy5OnToVMnWHZZXwM3fVu0CD77zI9fdVU49lgYMsSb\nfaW2aSqUPCi5qx0NDbD55r7U2CGHxB2NiEjzGho8EbzmGnjuOfjzn+GwwzSfXi1TcpcHJXe14447\nfHHw//5XfV5EpPqMHQu//CUMHAg33KCuJbVKU6GIRObO9X52l1+uxE5EqtNmm3n/vVVX9VaI99+P\nOyJJKn1MSlUYPhw22cQ7N4uIVKtllvEm2rPO8kEZTzwRd0SSRIlI7urr68s2AkXK7/PPvcbu0kvj\njkREpDiOPdbnzzv0ULj++rijkXJIpVLU19eX5VrqcycV75hjoFs3T/BERJLkgw9gr71gp528T3G7\nRExQJk3RgIo8KLlLtqefhsMP94mLl1su7mhERIrv22/hoIN8ZO2IEbDKKnFHJKWkARVS0+bNg+OO\ng7/+VYmdiCRXt27wwAOw6aYwaBDcd1/cEUm1U82dVKyzzoKJE2HMmLgjEREpj2ef9a4offvCxRfD\nxhvHHZEUm2rupGotWACffALffNOyxz/4INx5p88FJSJSK7bZxruh/PznfttiC7jxRl/tQiRfiUju\nNFq2Mixe7P1Ftt0Wll/e53Hq0wfWWANOPdU7Dudj/HhfYmzECOjevbQxi4hUmvbt4cQTYfJkX0v7\nySdhwABP9C6+OP/3UqksGi1bADXLVoa33oKjjvI5nM48E3bd1X8OASZM8ETtpptgq63g97+HwYNz\nn2fCBNh5Z5/2REuMiYi4BQt8gNm998Ldd/tEyIce6ite6EtwddFo2TwouYvfk0/CwQf7molHHdX4\nmolz5sDNN3viNmiQJ4HbbOND/+fMgVtugWHDfMqTI44o61MQEakaixfD88/DrbfCPffAPvv4mtvr\nrx93ZJIPJXd5UHIXrzvvhNNOg1GjYLvt8nvMvHmeyN10kw+YWHlln6i4rg7+9CfYYIOShiwikhhf\nfeV98q6+2t9DL7jAm3Clcim5y4OSu/hcfrkvC/bggy1PyKZP9zenHj2ga9fixiciUitmz4Zrr/X3\n5Z12gqFDYZ114o5KclFylwcld+XX0AC/+Q088gg8/DD07h13RCIiAjBrltfiDR8Oe+7pSd4aa8Qd\nlWTSVChScebP9068r7wCzz2nxE5EpJJ07Qrnn+8jaldfHTbZBH71K5+aSmqHkjvJ24wZPgp24UJ4\n9FFYYYW4IxIRkVy6dfMBau+95yv8bLih94/+/PO4I5NySERyp3nuSm/qVB/Zut56PniiU6e4IxIR\nkeZ07+4zGbzzjk9NNXCgz5339ddxR1Z7NM9dAdTnrvTGj4c99vCq/bPPbnyqExERqWwffwwXXQR3\n3QUnnQSnn+6Tzkv5qM+dxO7JJ2GHHXyKknPOUWInIlLNeveG66+HsWNhyhTo399XvZg9O+7IpJiU\n3Emjbr/dV4kYNQoOOyzuaEREpFj69fNJkJ99Ft5805O8K6+E776LOzIpBiV3spQQ4A9/8CH0Tz3l\nE2OKiEjyrLuuT0b/6KM+A0KfPt795uOP445MWkPJnSxhwQI45hhfv/DFF73zrYiIJNuGG8KYMd5c\nu2CBLxF58MHw0ktxRyYtoeROvvf5596/bvp0X6D6Rz+KOyIRESmnfv28eXbyZNhiC5/XdJtt4I03\n4o5MCqHkTgCflHjTTeGnP4W774YuXeKOSERE4rLccnDqqT4Z8hFH+JJml1/u3Xak8mkqlCoWgv/j\nvf22z1m0zDLeKXbQIOjYMb9zLF4Ml17q39RuuAH22ae0MYuISPX53/9gv/28+famm6Bt27gjql5a\nWzYPtZjcffaZJ2K33earRWy0Eay8so9yeu89mDgRttsO9t4b9tord/NqCD7NyVln+RxHt97qS9WI\niIjk8t13/pmy+upwyy2aGqullNzloVaSuxC8Y+u118IDD8BBB8Hxx3stXfY/2NdfwyOPwH33+f2A\nAZ7s9e3r53n/fXjoIf952DA48ED9k4qISPO++877Zu+9t69hK4VTcpcHMwsXXHABdXV11CVszo4Q\n4MMP4cEH4R//8H+qE07w0az5rus6fz6kUvDyy16tDrDmmrDttt5ZVkmdiIgUYto076P997/7euOS\nn1QqRSqVYtiwYUrumpOEmrvFi2HCBF/mK92sOmmSJ3bt2nlH1sMPh+23hzYaAiMiIjF78kk48kh4\n6638KxvEqeYuD9Wc3L3zDgwfDvfcAyuuCBts4BNKrrmm3/r1g169VLsmIiKV5+STYeZM7/8t+VNy\nl4dqTO5mz4bzzoORI+HXv4ajjtJgBhERqS6zZ3uFxMiRsNVWcUdTPcqR3LUr5cklt1tugVmzvCl2\nxRXjjkZERKRwXbr4VFqnnOIrW2h6lMqhmrsYhKCmVhERqX4h+AoWRx0Fxx5b2mstWuQzPdx8sw8I\nPOec0l6vVNQsm4dqTO5ERESS4tVXYc89fZqtrl2Lf/65c+H66+GKK6B3bxgyBPbd1+dorUblSO40\n9lJERERabOONfenKyy4r7nlD8P58a60FzzwD998PL7wARx9dvYlduajmTkRERFrlo498taQ33oDV\nVmv9+b74An75S5gyxVdkStKADdXciYiISMVbfXVfNen3v2/9uV54wWsDN9gAxo1LVmJXLqq5ExER\nkVabORPWXhseftiXxmyJW2/1Nc///ndfxzaJNKAiD0ruREREKsN118GYMfDYY4XPCnHVVT5o4tFH\nYZ11ShNfJVCzrIiIiFSNIUNg6lSfsiRfIcCwYXDttT5wIsmJXbmo5k5ERESK5j//gbPP9sEV7ZpZ\nKiEEOOMMX6v20Udh1VXLE2OcVHMnIiIiVWWPPaBnT7jkkqaPW7QIjjkGXnoJUqnaSOzKJRHLj9XX\n11NXV0ddXV3coYiIiNQ0Mx8YsckmsPXWkOujee5cOPhgmD8fHn8cll223FGWXyqVIpVKleVaapYV\nERGRonvsMTj8cLjnHthyyx+2v/suHHQQbLihj4rt0CG+GOOg0bJ5UHInIiJSmR580FeU2G03T+bG\njfPBFn/6Exx3XG2us67kLg9K7kRERCrXl1/CiBEwebLPg3fQQdC9e9xRxUfJXR6U3ImIiEi10GhZ\nERERESmIkjsRERGRBFFyJyIiIpIgSu5EREREEkTJnYiIiEiCKLkTERERSRAldyIiIiIJouRORERE\nJEEqLrkzswPN7Dkzm2Vmk+OOR0RERKSaVFxyB0wHrgbOjzsQERERkWrTLu4AsoUQHgcws5/HHYuI\niIhItanEmjsRERERaSEldyIiIiIJkndyZ2bnmtloM5tkZg1NDXYwszZmdrqZvWtmc83sIzP7i5l1\nzjjmsGjQxCwzm9naJyIiIiIihdXcXQTUAR8A3wChiWOvBC4HxgMnAaOBU4D7zcwAQggjQghdo9ty\nLYhdRERERLIUMqCiXwhhCoCZjQc65zrIzNYDTgbGhBAOyNg+GR8FezBwZ2MXMbM2QAegvf9qHQFC\nCPMLiFVERESkJuVdc5dO7PJwSHQ/PGv7TcAc4PBmHn9kdNxIoDcwF5iQ57WlDFKpVNwh1ByVefmp\nzMtPZV5+KvNkKsWAik2BxcDYzI1Rzdsb0f5GhRBuDSG0iW5to/t+JYhTWkhvBuWnMi8/lXn5qczL\nT2WeTKVI7noCX4UQFubYNxXobmYVN7+eiIiISBKUIrnrDDTWP25exjEiIiIiUmQWQlODXht5UDSg\nIldzqZm9BXQPIfTIsW8UsB/QMYSwqAXx5oql8CcgIiIiEpMQgpXy/KVoHp0GrGtm7XM0zfbCm2yL\nkthB6QtIREREpJqUoll2LNAW2Dxzo5l1AgYBr5TgmiIiIiJCaZK7kfgEx6dlbR8CLAOMKME1RURE\nRIQCmmXN7AigT/TrykB7M/td9PuUEMK/AEII483sWuAkMxsDPAQMwCc2ToUQ7iha9CIiIiKyhEJq\n7n4JXBjdugPLZ/z+y6xjTwN+A6wH/BU4EF+dYs98LmRmR5rZa2Y2x8w+M7ObzKx7AbFiZpub2eNm\nNtPMZpjZQ2b24yaOX83MbozWwZ1nZp+a2YNmNqCQ61arOMo843E9zOybaM3iM1v+LKpPOcvdzAZH\nazyPM7Pp0W2smf0qSdMT5bO2dR7n2N3MXjCz2Wb2tZmNMrO+jRy7vJldY2ZTo+uNN7MTivV8qkE5\ny9zMtjOza83srej1/oWZPWdmBxfzOVWycr/Gsx63oZktjN6v92vN86g2cZS7mQ00szuinGSemX1s\nZneb2SpNXiiEUFE34HSgAXgSOBYYBszC16ntnOc5foJPu/IBcCqebH4IzATWz3H8RsDXwCRgKHAU\ncCZwB7BN3GWSxDLPeuxd0XENwBlxl0dSyx34N/AlcCNwPHAiXrPeADwcd3kUsVyvip7TXcAx+DrX\nC4AniGYIaObx+0aPfxU4ATgH+Ayfp7NH1rEd8H7GC4C/RNcbEz3+grjLIqFl/hLwUXTNY6PX/IvR\n42+MuyySVt5Zj2sD/Dfj/XrfuMsiyeUO7IKv0vUmcDaem5wN3AOs2eS14i6srCfSHfgu+ue1jO17\nRgVybp7nGQt8m1lY+OTKM4BHso7thH8wvgp0ibsMaqHMsx63N7AIr+mtmeQuptf6lkCHHOe4Pbrm\nHnGXSxHKdb3ouYzO2n5StP2QZh7fPnqjnUxGgg38OHqd3pB1/InReX+dtf0ufL7P1eMukwSW+TZk\nfZACBqSi660Xd5kkqbyzHnsq/gX099RYchfD63wVvNLpAaBtwfHGXWBZT+bYqJAOy7HvQ+DtPM7R\nPzrHTTn23YwvjbZqxrYjMz/YgI74PHyxl0dSyzxjX1fgY7zJfjtqK7mLrdxzHLtXdJ6z4i6XIpTr\nH6PnslXW9o7AbOCBZh7/0+jx5+fY9zieSLfL2PZc9GHXIevYraPz/DbuMklImTf74Yb3624ADoy7\nTJJY3vha7zOBM/AapFpL7sr93jKUjC8r+OIP7fONtxSjZVsjve7sizn2/RefP6+5tu3mzmHA4Ixt\nu0f3M8zsGWAOMDfql7RzfmFXtTjKPO3iaN/50X0tibPcs60W3X+ex7GVrlVrW9N8mS4HrA3e/wYv\n39dCCAuyjn0ZnzVgk0KCr1JlK/NmJOl13JS4yvs6YCIwvJBgE6Tc5b473gKzgpm9jieQc83sGTNr\n9lBmQ24AAAXfSURBVH2l0pK7nvgb4tQc+6biH1Y98zhH+vhc5wCfTDltneh+DPANcBDwK7zZ7EEz\n27H5sKtaHGWOmf0E73NweghhVt7RJkcs5Z7NzLoAv8W/Nf5fM9erBq1d2zqfMk0fswLerWOpY6M3\n/K9ppvwTohxl3tzruCdwHJ58PNd0uFWv7OVtZgcBuwEnhBAaCow3Kcr53gKem7QHHgbG4at7nQWs\nD6TMbGBTwZZkhJyZLY93Fs/XVSGEb4jWnI3eGLPluy5ten++5+ga3U8IIfwsvdHMngDeAS7CO0tW\ntGoqczNrD9wEPBpCGJ13xBWomso9m5m1Bf4F9MX7i3zbzPWqQb5rW89s4vE0co7sMm3q2PTxtbCO\ndjnLfClRDfc90TFHhRAWNxlt9StreZvZCvhAghtDCP8tLNREKffrvCteAfevEML3M5KY2avAU3iz\nbaMjxEs1/cEK0YUDzTe3BeCfeK3ZHAAz65jjQ69TdD+nmfOl93fMsS/XOeZG9/9cIqgQPjSzF4Gt\nzWyZEMJcKls1lfnZQD98MEW1q6Zy/17UpPgP/G9wXghhZDPXqhZz8Fr3XDrhf4OmyrWQMm3q2PTx\nzf0Nk6CcZb4E85WP7sWbx38RQni+2WirX7nL+7LonOcUEGMSlbvc5+LJ3q2ZB4YQnjazj/F+6o0q\nSbNsCGFKCKFNCKFtdN/UrW0IYVL00Gn4B2SuKvheeOfCac1cPr2/sXPAktWin0T3n+U4/tMonuWb\nuWbsqqXMzawH3sfuNqCtmfU3s/4Zx3WPtlVFjUe1lHumKLG7GTgCqA8hXNLMdarJNPw11D7HvnzW\nti6kTL/B34CXOtbMOuIfBLmaYJKmnGX+vYzEbkfg2FA7E+SXrbzNbDBwNHAtsHLG+3V6jrUe0bYO\nhT6JKlTu13lzuckKTVyr4vrcpTsqbplj30+A90IIzX0Tbu4cAZ/2JC1dzdw7x/GrAQuB6c1cs5qV\nu8xXxb+5nAC8n3G7Pdp/TvT7rvkEX8XieK1nJnZHAX8IIVyYb8BVorVrWzdXpjPw1ydR36NxwOAc\nH26bRfe1sJZ22co869z34iMQh4QQbis87KpVzvJeHf8SeiFLvl+nvxBeE/2+fkHPoDqV+3WeHhTX\nWG7yRZNXK8YQ4WLdWHLurzYZ29NTNZyXdfxKwLrAclnbx0YFlT3310y8n1fmsWvjCdzLZAz/xuee\nWUyCJnethDLHRwTth0/mmHlLD/u+Jfq9Z9xlk6Ryj7Yb8Pfo/H+IuwxKVK7rR/+3d2VtT0+TcWjG\nth9FZbpMxrZ2+LfnKcCyGdvT7wc3Zp03Pc/dSVnbx1A789yVu8w74pNvL8Zr7GIvg6SWN9Cjkffr\na6JrXRr93i3ucklSuUfbd4zOe0/W9vRnxPVNxht3geUowDP4Ydb+4/BZ+2cDb5M1az9QHx37i6zt\nW+AdFD/kh1n7J+IfeBvkuOZF0Xmei/5QF+Aj3WbQzOoKSbjFUeY5Yqijhua5i6Pc8RUUGoDXgMOA\nw7NuP4m7TIpUrldHz3MMPp9gehb5J7OOuzU6brus7ftHb7bj8OTtHHx6jWksvVpCe/yLYXqFimOB\nu6PzDou7LBJa5ndF53i0kddxs+831X4rZ3k3cv2jqLF57uIod2BEdJ4HouP/jPfL+wT4UZOxxl1Y\njRTgL4DX8f4sn+HNSN1zHHdBVFBH5tj3E3xiwFn4B91DwKAmrjkk45pfR28gA+IuiySXedZj66ix\n5K7c5Y6PsFoc3Rpy3P4Rd3kUqUzb4Inzu3ji+zGeeGUnzLdEZbFtjnPsgc9H9R3eLWMUsEYj11se\nr8mYGl1vPHBi3OWQ1DLHZ/hv7DW8GBgad3kkqbwbuf5R0XlrLbkr93tLW3z6k/T1PsMTx17NxWrR\nCUREREQkASptQIWIiIiItIKSOxEREZEEUXInIiIikiBK7kREREQSRMmdiIiISIIouRMRERFJECV3\nIiIiIgmi5E5EREQkQZTciYiIiCSIkjsRERGRBPl/6Bh85SD2jC0AAAAASUVORK5CYII=\n",
"text": [
"<matplotlib.figure.Figure at 0x2b72bbace490>"
]
}
],
"prompt_number": 349
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print 'Number of non-zero weights is %d'%len(np.where(np.array(model_log_lasso.weights) != 0)[0])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Number of non-zero weights is 13143\n"
]
}
],
"prompt_number": 344
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Show the top 100 phrases from either end"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"vocab_map = np.array(cv.vocab_map_rdd.collect())"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 367
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"inds = np.argsort(np.array(model_log_lasso.weights))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 368
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"side question: how many ngrams of each length in the final vocabulary? "
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cv.vocab_rdd.map(lambda ngram: (len(ngram.split()),1)).reduceByKey(add).collect()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 369,
"text": [
"[(1, 26835),\n",
" (2, 195492),\n",
" (3, 56910),\n",
" (4, 21098),\n",
" (5, 10512),\n",
" (6, 5847),\n",
" (7, 3889),\n",
" (8, 2267)]"
]
}
],
"prompt_number": 369
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Top Republican phrases"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"vocab_map[inds[0:100]]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 370,
"text": [
"array([[u'court', u'72451'],\n",
" [u'respect', u'243934'],\n",
" [u'did', u'90356'],\n",
" [u'state', u'270389'],\n",
" [u'doe', u'97356'],\n",
" [u'u s', u'294362'],\n",
" [u'law', u'164353'],\n",
" [u'florida', u'125010'],\n",
" [u'ani', u'13529'],\n",
" [u'cir', u'48815'],\n",
" [u'instruct', u'149696'],\n",
" [u'deni', u'86925'],\n",
" [u'oklahoma', u'196003'],\n",
" [u'befor', u'31507'],\n",
" [u'address', u'5500'],\n",
" [u'fail', u'118226'],\n",
" [u'sinc', u'265672'],\n",
" [u's ct', u'251699'],\n",
" [u'inde', u'146205'],\n",
" [u'prosecut', u'224830'],\n",
" [u'trial', u'291993'],\n",
" [u's s ct l ed', u'252828'],\n",
" [u'u s s ct l ed', u'295022'],\n",
" [u's s ct l', u'252827'],\n",
" [u'u s s ct l', u'295021'],\n",
" [u'f cir', u'115874'],\n",
" [u'fed', u'120600'],\n",
" [u's s', u'252762'],\n",
" [u'action', u'3683'],\n",
" [u'onli', u'196425'],\n",
" [u'evid', u'110866'],\n",
" [u'unit', u'297316'],\n",
" [u'properti', u'223939'],\n",
" [u'trial court', u'292117'],\n",
" [u'place', u'210834'],\n",
" [u'noth', u'191766'],\n",
" [u'matter', u'177057'],\n",
" [u'standard', u'269878'],\n",
" [u'state v', u'272778'],\n",
" [u'air', u'9721'],\n",
" [u'intent', u'151037'],\n",
" [u'fed c', u'120606'],\n",
" [u'fed c c', u'120607'],\n",
" [u'carrier', u'42693'],\n",
" [u'murder', u'185627'],\n",
" [u'examin', u'112155'],\n",
" [u's c', u'251224'],\n",
" [u'strike', u'276658'],\n",
" [u'expert', u'114602'],\n",
" [u'proper', u'223606'],\n",
" [u'mrs', u'185251'],\n",
" [u'mitig', u'182327'],\n",
" [u'ground', u'134318'],\n",
" [u'argument', u'20820'],\n",
" [u'unit state v', u'298432'],\n",
" [u'man', u'174602'],\n",
" [u'provid', u'225783'],\n",
" [u'just', u'159681'],\n",
" [u'affirm', u'7688'],\n",
" [u'determin', u'89181'],\n",
" [u'resid', u'243589'],\n",
" [u'ordin', u'199302'],\n",
" [u'relat', u'238401'],\n",
" [u'make', u'173905'],\n",
" [u'retain', u'245350'],\n",
" [u'time', u'287851'],\n",
" [u'state law', u'271786'],\n",
" [u'district', u'95060'],\n",
" [u'declar', u'82653'],\n",
" [u'georgia', u'130754'],\n",
" [u'u s c', u'294398'],\n",
" [u'contain', u'65678'],\n",
" [u'present', u'217872'],\n",
" [u'observ', u'193454'],\n",
" [u's ct l ed', u'251798'],\n",
" [u'ed', u'101323'],\n",
" [u's ct l', u'251797'],\n",
" [u'u s s', u'294968'],\n",
" [u'judgment', u'157361'],\n",
" [u'case', u'42954'],\n",
" [u'land', u'163379'],\n",
" [u'document', u'97208'],\n",
" [u'offic', u'194859'],\n",
" [u'cert', u'45550'],\n",
" [u'section', u'258139'],\n",
" [u'everi', u'110744'],\n",
" [u'et', u'110404'],\n",
" [u'share', u'263804'],\n",
" [u'order', u'198323'],\n",
" [u'life', u'168419'],\n",
" [u'substanti', u'278327'],\n",
" [u'colleg', u'55297'],\n",
" [u'claim', u'51100'],\n",
" [u'parker', u'202893'],\n",
" [u'anoth', u'15361'],\n",
" [u'parti', u'203071'],\n",
" [u'specif', u'268503'],\n",
" [u'assum', u'23688'],\n",
" [u'main', u'173426'],\n",
" [u'wit', u'319049']], \n",
" dtype='<U67')"
]
}
],
"prompt_number": 370
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Top Democrat phrases"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"vocab_map[inds[-100:]]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 371,
"text": [
"array([[u'dori', u'98012'],\n",
" [u'explain', u'114736'],\n",
" [u'regulatori', u'237958'],\n",
" [u'estat', u'110004'],\n",
" [u'prejudic', u'217285'],\n",
" [u'opportun', u'197938'],\n",
" [u'lentz', u'166792'],\n",
" [u'pitt', u'210778'],\n",
" [u'jersey', u'155760'],\n",
" [u'assign', u'23074'],\n",
" [u'forego', u'125929'],\n",
" [u'common law', u'57596'],\n",
" [u'fcc', u'120329'],\n",
" [u'owner', u'200975'],\n",
" [u'clemmon', u'53699'],\n",
" [u'plaintiff error', u'211611'],\n",
" [u'restitut', u'244688'],\n",
" [u'revis', u'246876'],\n",
" [u'drug', u'98932'],\n",
" [u'texaco', u'286638'],\n",
" [u'supervisor', u'280556'],\n",
" [u'citat', u'49890'],\n",
" [u'henri', u'138636'],\n",
" [u'testimoni', u'285984'],\n",
" [u'offens', u'194312'],\n",
" [u'proof', u'223448'],\n",
" [u'offici', u'195467'],\n",
" [u'assess', u'22773'],\n",
" [u'coal', u'54146'],\n",
" [u'instant', u'149552'],\n",
" [u'explos', u'114854'],\n",
" [u'd c', u'79005'],\n",
" [u'voluntari', u'314520'],\n",
" [u'option', u'198171'],\n",
" [u'fisher', u'124508'],\n",
" [u'tribe', u'292731'],\n",
" [u'petit', u'209065'],\n",
" [u'serv', u'261564'],\n",
" [u'histori', u'139496'],\n",
" [u'mail', u'173259'],\n",
" [u'request', u'242004'],\n",
" [u'tax court', u'283310'],\n",
" [u'appear', u'16899'],\n",
" [u'complain', u'59456'],\n",
" [u'circuit court', u'49543'],\n",
" [u'yard', u'321536'],\n",
" [u'allow', u'11414'],\n",
" [u'alreadi', u'11795'],\n",
" [u'becaus', u'30618'],\n",
" [u'convers', u'68215'],\n",
" [u'howev', u'141282'],\n",
" [u'cabl', u'40506'],\n",
" [u'polic', u'213618'],\n",
" [u'thoma', u'287326'],\n",
" [u'proposit', u'224785'],\n",
" [u'challeng', u'46211'],\n",
" [u'delay', u'86098'],\n",
" [u'procedur', u'221135'],\n",
" [u'said', u'253822'],\n",
" [u'protect', u'225180'],\n",
" [u'fireston', u'124366'],\n",
" [u'appoint', u'19353'],\n",
" [u'avail', u'25770'],\n",
" [u'extradit', u'115557'],\n",
" [u'state court', u'270911'],\n",
" [u'initi', u'148491'],\n",
" [u'remand', u'239922'],\n",
" [u'error', u'109045'],\n",
" [u'treatment', u'291803'],\n",
" [u'believ', u'32225'],\n",
" [u'alleg', u'10577'],\n",
" [u'exchang', u'112663'],\n",
" [u'mention', u'179794'],\n",
" [u'regard', u'237099'],\n",
" [u'rais', u'231787'],\n",
" [u'twa', u'294041'],\n",
" [u'import', u'144102'],\n",
" [u'revers', u'246249'],\n",
" [u'circumst', u'49692'],\n",
" [u'moreov', u'183549'],\n",
" [u'wilson', u'318710'],\n",
" [u'accord', u'1476'],\n",
" [u'homosexu', u'140446'],\n",
" [u'test', u'285578'],\n",
" [u'interpret', u'152138'],\n",
" [u'f c c', u'115778'],\n",
" [u'epa', u'108035'],\n",
" [u'cite', u'49935'],\n",
" [u'magistr', u'173121'],\n",
" [u'therefor', u'286876'],\n",
" [u'circuit', u'49515'],\n",
" [u'content', u'66155'],\n",
" [u'hill', u'139242'],\n",
" [u'abov', u'369'],\n",
" [u'bpa', u'36162'],\n",
" [u'f c', u'115777'],\n",
" [u'major', u'173714'],\n",
" [u'sec', u'257351'],\n",
" [u'provis', u'226510'],\n",
" [u'refer', u'236361']], \n",
" dtype='<U67')"
]
}
],
"prompt_number": 371
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment