Jaya Zenchenko sigma23

What

Roll your own iPython Notebook server with Amazon Web Services (EC2) using their Free Tier.

An active AWS account. First time sign-ups are eligible for the free tier for a year
One Micro Tier EC2 Instance
With AWS we will use the stock Ubuntu Server AMI and customize it.
Anaconda for Python.
Coffee/Beer/Time

	# preliminaries
	from pymongo import MongoClient
	from nltk.corpus import stopwords
	from string import ascii_lowercase
	import pandas as pd
	import gensim, os, re, pymongo, itertools, nltk, snowballstemmer

	# set the location where we'll save our model
	savefolder = '/data'

	"""
	Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)
	BSD License
	"""
	import numpy as np

	# data I/O
	data = open('input.txt', 'r').read() # should be simple plain text file
	chars = list(set(data))
	data_size, vocab_size = len(data), len(chars)

	"""
	MAUCpy
	~~~~~~

	Contains two equations from Hand and Till's 2001 paper on a multi-class
	approach to the AUC. The a_value() function is the probabilistic approximation
	of the AUC found in equation 3, while MAUC() is the pairwise averaging of this
	value for each of the classes. This is equation 7 in their paper.
	"""

	/*
	This example uses Scala. Please see the MLlib documentation for a Java example.

	Try running this code in the Spark shell. It may produce different topics each time (since LDA includes some randomization), but it should give topics similar to those listed above.

	This example is paired with a blog post on LDA in Spark: http://databricks.com/blog
	Spark: http://spark.apache.org/
	*/

	import scala.collection.mutable

	import json
	import urlparse
	from itertools import chain
	flatten = chain.from_iterable

	from nltk import word_tokenize

	from gensim.corpora import Dictionary
	from gensim.models.ldamodel import LdaModel
	from gensim.models.tfidfmodel import TfidfModel

	# sql.export.rf(): save a randomForest model as SQL
	# v0.04
	# Copyright (c) 2013-2014 Shane Butler <shane dot butler at gmail dot com>
	#
	# sql.export.rf is free software: you can redistribute it and/or modify it
	# under the terms of the GNU General Public License as published by
	# the Free Software Foundation, either version 2 of the License, or
	# (at your option) any later version.
	#
	# sql.export.rf is distributed in the hope that it will be useful, but

	def linear_model_ensemble(X, y, X_test, fold_num, fold_num_sec, grid_search_range, oobe=True, x_val=True ):

	'''
	X - Train set

	y - Train set labels with. Labels are 1 for pos instances and -1 for neg instances

	fold_num1 - Fold size for the first step X-validation to set the hyper-params
	and feature selectors

	# sql.export.gbm(): save a GBM model as SQL
	# v0.11
	# Copyright (c) 2013-2014 Shane Butler <shane dot butler at gmail dot com>
	#
	# sql.export.gbm is free software: you can redistribute it and/or modify it
	# under the terms of the GNU General Public License as published by
	# the Free Software Foundation, either version 2 of the License, or
	# (at your option) any later version.
	#
	# sql.export.gbm is distributed in the hope that it will be useful, but

	# Delete the possibly existing autocomplete test index
	curl -X DELETE localhost:9200/autocomplete_test

	# Put the config of the autocomplete index
	curl -X PUT localhost:9200/autocomplete_test -d '
	{
	"settings" : {
	"index" : {
	"analysis" : {
	"analyzer" : {