Mageswaran Mageswaran1989

Using Git

	/**
	* Get the stackexchange data from https://archive.org/details/stackexchange
	* Data set used here : math.stackexchange.com
	**/

	//Open the file. The text file is an RDD (Resilient Distributed Dataset)
	//of Strings, which are the lines of the file.
	val postXML = sc.textFile("Posts.xml")

	//Count the lines. Note: Run twice and see the difference ;)

	package org.aja.tej.tej.test.spark

	/**
	* Created by mageswaran on 9/8/15.
	*/

	import java.util.Random

	import org.apache.spark.{SparkConf, SparkContext}

	package org.aja.tej.examples


	import java.io.File

	import org.aja.tej.utils.TejUtils
	import org.apache.spark.{SparkConf, SparkContext}


	/**

	package org.aja.tej.examples.ml

	import org.aja.tej.utils.TejUtils
	import org.apache.spark.ml.classification.MultilayerPerceptronClassifier
	import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
	import org.apache.spark.mllib.util.MLUtils
	import org.apache.spark.sql.SQLContext

	/**
	* Created by mageswaran on 25/9/15.

	//For any updates check : https://github.com/Mageswaran1989/aja/blob/master/src/examples/scala/org/aja/tej/examples/streaming/twitter/TwitterWithNeo4j.scala
	package org.aja.tej.examples.streaming.twitter

	import com.google.gson.Gson
	import org.aja.tej.utils.{TejUtils, TejTwitterUtils}
	import org.anormcypher.{Cypher, Neo4jREST}
	import org.apache.spark.sql.{AnalysisException, Row, SQLContext}
	import org.apache.spark.streaming.twitter.TwitterUtils
	import org.apache.spark.streaming.{Seconds, StreamingContext}
	import play.api.libs.ws.ning

	import os
	import boto3
	from collections import defaultdict
	import botocore

	def get_matching_s3_objects(bucket,
	aws_access_key_id,
	aws_secret_access_key,
	region_name,
	prefix='',