Skip to content

Instantly share code, notes, and snippets.

View FavioVazquez's full-sized avatar
💻
Changing the world :)

Favio André Vázquez FavioVazquez

💻
Changing the world :)
View GitHub Profile
@FavioVazquez
FavioVazquez / .gitignore
Last active August 29, 2015 14:17 — forked from btbytes/.gitignore
Great gist to make pdf of your slidify presentations. This is a fork of a fork from Ramnath Vaidyanathan. Is important that you copy the makefile exactly as it is in here. If that does't work, instead of copying the spaces, make tabs.
*.md
*.html
*.pdf
libraries
@FavioVazquez
FavioVazquez / Optimus.ipynb
Created August 7, 2017 01:24
Optimus Example
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# Import optimus
import optimus as op
# Instanciate DataFrameTransfomer
transformer = op.DataFrameTransformer(df)
# Get original dataFrame to show it.
transformer.show()
# Chaining function transformations
transformer.trim_col("*")
.remove_special_chars("*")
.clear_accents("*")
# Import optimus
import optimus as op
# Choose a column for analyzing
detector = op.OutlierDetector(df,"num")
# With the outliers() method you can use MAD to detect if there is an outlier in your column
detector.outliers()
# And with the run() method you can see which values are not outliers
detector.run()
# Finally with the delete_outliers() method you can delete existing outliers in your column.
# This will modify the dataframe we have used when instantiating the OutlierDetector
# Import optimus
import optimus as op
transformer = op.DataFrameTransformer(df)
# Choose the columns to run the analysis and the names of the columns for the output
transformer.impute_missing(["a","b"],["out_a","out_B"],strategy="mean").show()
# Choose the columns to run the analysis and the names of the columns for the output
transformer.impute_missing(["a","b"],["out_a","out_B"],strategy="median").show()
# Import optimus
import optimus as op
# Instance of Utilities class
tools = op.Utilities()
# Reading df from web
url = "https://raw.githubusercontent.com/ironmussa/Optimus-examples/master/examples/foo.csv"
df = tools.read_dataset_url(path=url)
# Import optimus
import optimus as op
# Instance of Utilities class
tools = op.Utilities()
# Reading DF from web
url = "https://raw.githubusercontent.com/ironmussa/Optimus-examples/master/examples/foo.csv"
df = tools.read_dataset_url(path=url)
# Instance of transformer class
transformer = op.DataFrameTransformer(df)
import findspark
findspark.init("/opt/spark")
import random
from pyspark import SparkContext
sc = SparkContext(appName="EstimatePi")
def inside(p):
x, y = random.random(), random.random()
return x*x + y*y < 1
NUM_SAMPLES = 1000000
count = sc.parallelize(range(0, NUM_SAMPLES)) \
!curl -O http://download.tensorflow.org/example_images/flower_photos.tgz
!tar xzf flower_photos.tgz
!mkdir flower_photos/sample