This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# | |
# Deduplication collection mongodb database utility. | |
# | |
# Works only with pymongo >= 3.0 | |
# Kireal | |
# | |
# 01.10.2016 | |
# Last change: 04.10.2016 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#REad in the log | |
matches = pd.read_csv('../data/all_but_champ/match_log.csv') | |
#Add a column for match length | |
matches["length"] = matches["player1-score"] + matches["player2-score"] | |
# Get all the records where Trav won | |
travis_winner = matches[matches["winner"] == "Travis Roberts"] | |
# Get all the records where Trav lost | |
travis_loser = matches[matches["loser"] == "Travis Roberts"] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A simple cheat sheet of Spark Dataframe syntax | |
# Current for Spark 1.6.1 | |
# import statements | |
from pyspark.sql import SQLContext | |
from pyspark.sql.types import * | |
from pyspark.sql.functions import * | |
#creating dataframes | |
df = sqlContext.createDataFrame([(1, 4), (2, 5), (3, 6)], ["A", "B"]) # from manual data |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## FUC (Frequently Used Commands) | |
df.isnull.sum() # num of missing values per column | |
## USEFUL FUNCTIONS | |
# Binning | |
pd.cut | |
pd.qcut # qunatile-based pd.cut | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#### BASIC ######################################################################################################################## | |
# cleaning str in the header | |
df.columns = [x.lower().strip() for x in df.columns] # lower case, trim leading and trailing spaces | |
df.columns = [x.strip().replace(' ', '_') for x in df.columns] # replace whitespaces b/w words with _ | |
# checking NaN in all df | |
df.isnull().values.any() | |
# get column-slices |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import pandas as pd | |
import pymongo | |
import json | |
def import_content(filepath): | |
mng_client = pymongo.MongoClient('localhost', 27017) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import pandas as pd | |
import pymongo | |
import json | |
def import_content(filepath): | |
mng_client = pymongo.MongoClient('localhost', 27017) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import py2neo | |
import datetime | |
#where we write it | |
f_name = 'DBREPORT_%s.txt' % datetime.datetime.today().strftime('%Y-%m-%d') | |
#overwrite anything previous | |
with open(f_name,'wb') as f: | |
f.write('REPORT COMPILATION STARTED AT %s' % datetime.datetime.now()) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import StringIO | |
from flask import Flask, Response | |
@app.route('/some_dataframe.csv') | |
def output_dataframe_csv(): | |
output = StringIO.StringIO() | |
some_dataframe.to_csv(output) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pandas.core.api import DataFrame | |
from pandas.tseries.tools import to_datetime | |
#save me at site-packages\pandas\io\cypher.py | |
def read_cypher(cypher, con, index_col=None, params = {},parse_dates = None, columns= None): | |
''' | |
Run a Cypher query against the graph at con, put the results into a df | |
Parameters |