Skip to content

Instantly share code, notes, and snippets.

View ipeirotis's full-sized avatar

Panos Ipeirotis ipeirotis

View GitHub Profile
# Panos Ipeirotis, Oct 30 2016
# Just trying to keep my 5yo busy while it is pouring rain outside...
import random
import emoji # https://pypi.python.org/pypi/emoji/
# Selection from http://www.webpagefx.com/tools/emoji-cheat-sheet/
emojis = [':apple:', ':green_apple:', ':tangerine:', ':lemon:', ':cherries:', ':grapes:', ':watermelon:', ':strawberry:',
':peach:', ':melon:', ':banana:', ':pear:', ':pineapple:', ':sweet_potato:', ':eggplant:', ':tomato:', ':corn:']
@ipeirotis
ipeirotis / baseball_salaries_violin_plots.py
Last active June 30, 2018 03:49
Plot the distribution of salaries in baseball leagues over time
# Long version with full comments
# We want to plot the distribution of salaries in baseball leagues over time
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# Download a dataset from Lahman's database of baseball statistics
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# We use the "CUBE" operator, to calculate the frequency of any
# attribute-value combination.
#
# Notice that we replace the NULL values with "N/A" before the CUBE operator.
# This is to avoid confusion with the way that CUBE uses NULL values to indicate
# "any value" for attribute combinations that do not use the available attributes
# of the cube.
df = (
dataset
SELECT
P1.*
, COALESCE(P1.count-P2.count,P1.count) AS count_without_missing
, P2.count AS count_missing
FROM
itemsets P1
JOIN itemsets P2 ON (P1.email = P2.email)
WHERE
P2.itemset_size>=2 AND
P1.itemset_size = P2.itemset_size - 1 AND
import json
import os
from getpass import getpass
from pathlib import Path
from typing import Dict, List, Optional, Union
from IPython.display import display, HTML, JSON
try: