This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## FUC (Frequently Used Commands) | |
df.isnull.sum() # num of missing values per column | |
## USEFUL FUNCTIONS | |
# Binning | |
pd.cut | |
pd.qcut # qunatile-based pd.cut | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A simple cheat sheet of Spark Dataframe syntax | |
# Current for Spark 1.6.1 | |
# import statements | |
from pyspark.sql import SQLContext | |
from pyspark.sql.types import * | |
from pyspark.sql.functions import * | |
#creating dataframes | |
df = sqlContext.createDataFrame([(1, 4), (2, 5), (3, 6)], ["A", "B"]) # from manual data |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#REad in the log | |
matches = pd.read_csv('../data/all_but_champ/match_log.csv') | |
#Add a column for match length | |
matches["length"] = matches["player1-score"] + matches["player2-score"] | |
# Get all the records where Trav won | |
travis_winner = matches[matches["winner"] == "Travis Roberts"] | |
# Get all the records where Trav lost | |
travis_loser = matches[matches["loser"] == "Travis Roberts"] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# | |
# Deduplication collection mongodb database utility. | |
# | |
# Works only with pymongo >= 3.0 | |
# Kireal | |
# | |
# 01.10.2016 | |
# Last change: 04.10.2016 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# @Author: xiewenqian <int> | |
# @Date: 2016-11-28T20:35:09+08:00 | |
# @Email: [email protected] | |
# @Last modified by: int | |
# @Last modified time: 2016-12-01T19:32:48+08:00 | |
import pandas as pd | |
from pymongo import MongoClient |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def reaching_max_messages(current_hour_bitrange): | |
""" | |
from https://stackoverflow.com/a/29281409 | |
""" | |
# https://stackoverflow.com/a/29281409 | |
# https://docs.mongodb.com/manual/reference/operator/query/bitsAllSet/ | |
pipeline = [ | |
{ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import pandas as pd | |
from pymongo import MongoClient | |
def mongo_to_pandas(client, database, collection): | |
""" Reads mongo collection to pandas data frame.""" | |
cur = client[database][collection].find() | |
df = pd.DataFrame(list(cur)) | |
df["_id"] = df["_id"].apply(lambda x: str(x)) #convert ids to strings |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import re, os, time, csv, random, pymongo, math, html, string, html2text | |
from pymongo import MongoClient | |
import xml.etree.ElementTree as ET | |
import pymongo | |
import sklearn | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from scipy.cluster.hierarchy import fclusterdata | |
from pymongo import MongoClient | |
import numpy as np | |
from datetime import * | |
import json | |
def saveToRelational(jsonPacket): | |
""" | |
save the received json packet to a relational database |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from pandas.io.json import json_normalize | |
from pymongo import MongoClient | |
import matplotlib.pyplot as plt | |
import re | |
import time | |
pd.set_option('display.expand_frame_repr', False) | |
def _connect_mongo(host, port, db): |