Assuming you've installed the AWS SDK for Python:
pip install awscli
pip install boto3
Next, run aws configure on your laptop or server to add your AWS credentials.
For details, see
| $ ./bin/spark-shell | |
| 14/04/18 15:23:49 INFO spark.HttpServer: Starting HTTP Server | |
| 14/04/18 15:23:49 INFO server.Server: jetty-7.x.y-SNAPSHOT | |
| 14/04/18 15:23:49 INFO server.AbstractConnector: Started [email protected]:49861 | |
| Welcome to | |
| ____ __ | |
| / __/__ ___ _____/ /__ | |
| _\ \/ _ \/ _ `/ __/ '_/ | |
| /___/ .__/\_,_/_/ /_/\_\ version 0.9.1 | |
| /_/ |
| 2014-03-04 | 15dfb8e6cc4111e3a5bb600308919594 | 11 | |
|---|---|---|---|
| 2014-03-06 | 81da510acc4111e387f3600308919594 | 61 |
| // load error messages from a log into memory | |
| // then interactively search for various patterns | |
| // base RDD | |
| val lines = sc.textFile("log.txt") | |
| // transformed RDDs | |
| val errors = lines.filter(_.startsWith("ERROR")) | |
| val messages = errors.map(_.split("\t")).map(r => r(1)) | |
| messages.cache() |
| vagrant up | |
| vagrant ssh | |
| cd jem | |
| nbserver |
| # using four part files to construct "minitweet" | |
| cat rawtweets/part-0000[1-3] > minitweets | |
| # change log4j properties to WARN to reduce noise during demo | |
| mv conf/log4j.properties.template conf/log4j.properties | |
| vim conf/log4j.properties # Change to WARN | |
| # launch Spark shell REPL | |
| ./bin/spark-shell |
| import nltk | |
| nltk.download() | |
| ## use nltk.download() within a Python prompt to | |
| ## download the `punkt` data | |
| ## Anaconda is recommended, to pick up NumPy, NLTK, etc. | |
| ## http://continuum.io/downloads | |
| ## this also requires TextBlob/PerceptronTagger |
| import org.apache.spark.graphx._ | |
| import org.apache.spark.rdd.RDD | |
| case class Peep(name: String, age: Int) | |
| val vertexArray = Array( | |
| (1L, Peep("Kim", 23)), | |
| (2L, Peep("Pat", 31)), | |
| (3L, Peep("Chris", 52)), | |
| (4L, Peep("Kelly", 39)), |
| <html> | |
| <head><title>test pdf</title></head> | |
| <div id="pdf" | |
| style="width:900px; height:500px" | |
| ></div> | |
| <script src="https://pdfobject.com/js/pdfobject.min.js"></script> | |
| <script> | |
| var options = { | |
| pdfOpenParams: { | |
| page: 1, |
| #!/usr/bin/env python | |
| # encoding: utf-8 | |
| import codecs | |
| import csv | |
| import json | |
| import unicodedata | |
| filename = "SurveyofDoctorateRecipients_linkages.csv" | |
| #filename = "SurveyofEarnedDoctorates_linkages.csv" |
Assuming you've installed the AWS SDK for Python:
pip install awscli
pip install boto3
Next, run aws configure on your laptop or server to add your AWS credentials.
For details, see