Skip to content

Instantly share code, notes, and snippets.

View ipeirotis's full-sized avatar

Panos Ipeirotis ipeirotis

View GitHub Profile
import json
import os
from getpass import getpass
from pathlib import Path
from typing import Dict, List, Optional, Union
from IPython.display import display, HTML, JSON
try:
SELECT
P1.*
, COALESCE(P1.count-P2.count,P1.count) AS count_without_missing
, P2.count AS count_missing
FROM
itemsets P1
JOIN itemsets P2 ON (P1.email = P2.email)
WHERE
P2.itemset_size>=2 AND
P1.itemset_size = P2.itemset_size - 1 AND
# We use the "CUBE" operator, to calculate the frequency of any
# attribute-value combination.
#
# Notice that we replace the NULL values with "N/A" before the CUBE operator.
# This is to avoid confusion with the way that CUBE uses NULL values to indicate
# "any value" for attribute combinations that do not use the available attributes
# of the cube.
df = (
dataset
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@ipeirotis
ipeirotis / baseball_salaries_violin_plots.py
Last active June 30, 2018 03:49
Plot the distribution of salaries in baseball leagues over time
# Long version with full comments
# We want to plot the distribution of salaries in baseball leagues over time
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# Download a dataset from Lahman's database of baseball statistics
# Panos Ipeirotis, Oct 30 2016
# Just trying to keep my 5yo busy while it is pouring rain outside...
import random
import emoji # https://pypi.python.org/pypi/emoji/
# Selection from http://www.webpagefx.com/tools/emoji-cheat-sheet/
emojis = [':apple:', ':green_apple:', ':tangerine:', ':lemon:', ':cherries:', ':grapes:', ':watermelon:', ':strawberry:',
':peach:', ':melon:', ':banana:', ':pear:', ':pineapple:', ':sweet_potato:', ':eggplant:', ':tomato:', ':corn:']
@ipeirotis
ipeirotis / MTurk_Cohort_Analysis.tsv
Created February 29, 2016 15:30
Data for Mechanical Turk Cohort Analysis
firstSeen lastSeen cnt
2014-05 2014-05 882
2014-05 2014-06 255
2014-05 2014-07 108
2014-05 2014-08 93
2014-05 2014-09 68
2014-05 2014-10 44
2014-05 2014-11 59
2014-05 2014-12 35
2014-05 2015-01 33
### Cohort Analysis
import matplotlib.pyplot as plt
# Connect to the BigQuery API
from googleapiclient.discovery import build
from oauth2client import client
credentials = client._get_application_default_credential_from_file('client_secrets.json')
credentials = credentials.create_scoped('https://www.googleapis.com/auth/bigquery')
@ipeirotis
ipeirotis / WikiSynonyms-PythonProgrammingLanguage.json
Created February 26, 2013 03:23
Response of the WikiSynonyms service for the term 'Python (programming language)'. [http://wikisynonyms.ipeirotis.com/api/Python%20(programming%20language)]
{
"http": 200,
"message": "success",
"terms": [
{
"term": "Python (programming language)",
"canonical": 1,
"oskill": 1
},
{
@ipeirotis
ipeirotis / WikiSynonyms-Python.json
Created February 26, 2013 03:19
Response of the WikiSynonyms service for the term 'Python'. Since this is a disambiguation page, the service returns the different 'senses' of the word and you need to pick the one that you want.
{
"http": 300,
"message": "The entry is a disambiguation page in Wikipedia. Please query again with one of the returned terms",
"terms": [
"Armstrong Siddeley Python",
"CMU Common Lisp",
"CPython",
"Colt Python",
"Computer",
"Monty Python",