Skip to content

Instantly share code, notes, and snippets.

@mieitza
mieitza / deduplication_mongod.py
Created January 3, 2018 15:09 — forked from kireal/deduplication_mongod.py
Run to deduplicate mongodb collection
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Deduplication collection mongodb database utility.
#
# Works only with pymongo >= 3.0
# Kireal
#
# 01.10.2016
# Last change: 04.10.2016
#REad in the log
matches = pd.read_csv('../data/all_but_champ/match_log.csv')
#Add a column for match length
matches["length"] = matches["player1-score"] + matches["player2-score"]
# Get all the records where Trav won
travis_winner = matches[matches["winner"] == "Travis Roberts"]
# Get all the records where Trav lost
travis_loser = matches[matches["loser"] == "Travis Roberts"]
@mieitza
mieitza / Spark Dataframe Cheat Sheet.py
Created January 3, 2018 15:08 — forked from ved93/Spark Dataframe Cheat Sheet.py
Cheat sheet for Spark Dataframes (using Python)
# A simple cheat sheet of Spark Dataframe syntax
# Current for Spark 1.6.1
# import statements
from pyspark.sql import SQLContext
from pyspark.sql.types import *
from pyspark.sql.functions import *
#creating dataframes
df = sqlContext.createDataFrame([(1, 4), (2, 5), (3, 6)], ["A", "B"]) # from manual data
## FUC (Frequently Used Commands)
df.isnull.sum() # num of missing values per column
## USEFUL FUNCTIONS
# Binning
pd.cut
pd.qcut # qunatile-based pd.cut
@mieitza
mieitza / pandas-cheatsheet.py
Created January 3, 2018 15:08 — forked from spepechen/pandas-cheatsheet.py
handy 🐼 snippets
#### BASIC ########################################################################################################################
# cleaning str in the header
df.columns = [x.lower().strip() for x in df.columns] # lower case, trim leading and trailing spaces
df.columns = [x.strip().replace(' ', '_') for x in df.columns] # replace whitespaces b/w words with _
# checking NaN in all df
df.isnull().values.any()
# get column-slices
@mieitza
mieitza / import_csv_to_mongo
Created January 3, 2018 15:07 — forked from thangarajan8/import_csv_to_mongo
Store CSV data into mongodb using python pandas
#!/usr/bin/env python
import sys
import pandas as pd
import pymongo
import json
def import_content(filepath):
mng_client = pymongo.MongoClient('localhost', 27017)
@mieitza
mieitza / import_csv_to_mongo
Created January 3, 2018 15:07 — forked from alpoza/import_csv_to_mongo
Store CSV data into mongodb using python pandas
#!/usr/bin/env python
import sys
import pandas as pd
import pymongo
import json
def import_content(filepath):
mng_client = pymongo.MongoClient('localhost', 27017)
@mieitza
mieitza / database_report.py
Created January 3, 2018 15:06 — forked from gregroberts/database_report.py
Py2neo Write a report on the contents of a graph database, describe what nodes are in, what sort of properties they (seem to) have, and what edges come in and out of each
import py2neo
import datetime
#where we write it
f_name = 'DBREPORT_%s.txt' % datetime.datetime.today().strftime('%Y-%m-%d')
#overwrite anything previous
with open(f_name,'wb') as f:
f.write('REPORT COMPILATION STARTED AT %s' % datetime.datetime.now())
@mieitza
mieitza / gist:4341f290e0dba9c6c53a5596779f4e94
Created January 3, 2018 15:06 — forked from c0ldlimit/gist:5164171
#python #flask #pandas Using flask to return a csv response from a dataframe
import StringIO
from flask import Flask, Response
@app.route('/some_dataframe.csv')
def output_dataframe_csv():
output = StringIO.StringIO()
some_dataframe.to_csv(output)
@mieitza
mieitza / cypher.py
Created January 3, 2018 15:06 — forked from gregroberts/cypher.py
A function for pandas to get results of a cypher query directly into a DataFrame
from pandas.core.api import DataFrame
from pandas.tseries.tools import to_datetime
#save me at site-packages\pandas\io\cypher.py
def read_cypher(cypher, con, index_col=None, params = {},parse_dates = None, columns= None):
'''
Run a Cypher query against the graph at con, put the results into a df
Parameters