Tested with Apache Spark 2.1.0, Python 2.7.13 and Java 1.8.0_112
For older versions of Spark and ipython, please, see also previous version of text.
| from pyspark import SparkContext | |
| sc = SparkContext(appName = "simple app") | |
| sc._jsc.hadoopConfiguration().set("fs.s3n.awsAccessKeyId", "yourAccessKeyId") | |
| sc._jsc.hadoopConfiguration().set("fs.s3n.awsSecretAccessKey", "yourSecretAccessKey") | |
| text_file = sc.textFile("s3n://bucketName/filename.tar.gz") | |
| counts = text_file.flatMap(lambda line: line.split(" ")) \ |
Tested with Apache Spark 2.1.0, Python 2.7.13 and Java 1.8.0_112
For older versions of Spark and ipython, please, see also previous version of text.
| import selenium | |
| import time | |
| from selenium import webdriver | |
| browser = webdriver.PhantomJS("phantomjs") | |
| browser.get("https://twitter.com/StackStatus") | |
| print browser.title | |
| pause = 3 |
Sublime Text includes a command line tool, subl, to work with files on the command line. This can be used to open files and projects in Sublime Text, as well working as an EDITOR for unix tools, such as git and subversion.
Applications folderSetup
A personal diary of DataFrame munging over the years.
Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)
| /* Flatten das boostrap */ | |
| .well, .navbar-inner, .popover, .btn, .tooltip, input, select, textarea, pre, .progress, .modal, .add-on, .alert, .table-bordered, .nav>.active>a, .dropdown-menu, .tooltip-inner, .badge, .label, .img-polaroid { | |
| -moz-box-shadow: none !important; | |
| -webkit-box-shadow: none !important; | |
| box-shadow: none !important; | |
| -webkit-border-radius: 0px !important; | |
| -moz-border-radius: 0px !important; | |
| border-radius: 0px !important; | |
| border-collapse: collapse !important; | |
| background-image: none !important; |
| import cv2.cv as cv | |
| import tesseract | |
| gray = cv.LoadImage('captcha.jpeg', cv.CV_LOAD_IMAGE_GRAYSCALE) | |
| cv.Threshold(gray, gray, 231, 255, cv.CV_THRESH_BINARY) | |
| api = tesseract.TessBaseAPI() | |
| api.Init(".","eng",tesseract.OEM_DEFAULT) | |
| api.SetVariable("tessedit_char_whitelist", "0123456789abcdefghijklmnopqrstuvwxyz") | |
| api.SetPageSegMode(tesseract.PSM_SINGLE_WORD) | |
| tesseract.SetCvImage(gray,api) | |
| print api.GetUTF8Text() |
| #!/usr/bin/env python | |
| class Node( object ): | |
| def __init__( self, end_node = False ): | |
| self.end_node = end_node | |
| self.prefix_count = 0 | |
| self.children = {} | |
| # GNU Screen - main configuration file | |
| # All other .screenrc files will source this file to inherit settings. | |
| # Author: Christian Wills - [email protected] | |
| # Allow bold colors - necessary for some reason | |
| attrcolor b ".I" | |
| # Tell screen how to set colors. AB = background, AF=foreground | |
| termcapinfo xterm 'Co#256:AB=\E[48;5;%dm:AF=\E[38;5;%dm' |