I hereby claim:
- I am enkeboll on github.
- I am enkeboll (https://keybase.io/enkeboll) on keybase.
- I have a public key ASAFYSl9y480O-OPtLFBOEA-F5508UFKGk6G3p57BQGk7Qo
To claim this, I am signing this object:
| javascript: (() => {let pw = prompt("Treasury password:");for (const char of pw.toLowerCase()) { PasswordVK(char) };document.querySelector('input.action[value="Submit"]').click();})(); |
| # -*- coding: utf-8 -*- | |
| """Automation demo - Andy.ipynb | |
| Automatically generated by Colaboratory. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1ehWn8-jcItWIFNFhKWUdZBOF0DAdMvll | |
| ## Automating the process | |
| First, two important things to do to follow along with lecture: |
| # This application that can compute the top 25 pages on Wikipedia for each of the Wikipedia sub-domains | |
| import argparse | |
| import csv | |
| import datetime | |
| import gzip | |
| import os | |
| import sys | |
| from collections import defaultdict, namedtuple | |
| from heapq import heappush, heappushpop |
| import datetime | |
| from operator import attrgetter | |
| from ics import Calendar, Event | |
| BREAK_BEGIN = datetime.date(2020, 12, 28) | |
| def runs_over_break(start_date): | |
| if (BREAK_BEGIN > start_date and | |
| BREAK_BEGIN < start_date + datetime.timedelta(weeks=15, days=5)): |
| redshift | |
| -- COMMAND LINE CONNECT: | |
| - seatgeek: | |
| - psql "host=redshift-datawarehouse.service.seatgeek.prod dbname=sganalytic user=andy port=5439" | |
| - greenhouse: | |
| - psql "host=redshift.greenhouse.io dbname=greenhouse user=org_978 port=5439 sslmode=require" | |
| -- CREDENTIALS LINE | |
| CREDENTIALS 'aws_iam_role=arn:aws:iam::093535234988:role/Production-RedshiftCopyUnload' |
| # from http://blogs.quovantis.com/how-to-convert-csv-to-parquet-files/ | |
| from pyspark import SparkContext | |
| from pyspark.sql import SQLContext | |
| from pyspark.sql.types import StructField, StructType, IntegerType, StringType, TimestampType | |
| if __name__ == "__main__": | |
| sc = SparkContext(appName="CSV2Parquet") | |
| sqlContext = SQLContext(sc) |
| create_table = """ | |
| DROP TABLE IF EXISTS `dec_test`; | |
| CREATE TABLE `dec_test` ( | |
| `dec_2_2` decimal(4,2), | |
| `dec_4_2` decimal(6,2), | |
| `dec_8_4` decimal(8,4), | |
| `char_15` char(15) | |
| ) ENGINE=InnoDB DEFAULT CHARSET=utf8; | |
| insert into dec_test values |
| import bisect | |
| import datetime | |
| import operator | |
| import boto3 | |
| client = boto3.client('cloudwatch') | |
| def get_metric(offset, metricname, threshold, oper='ge', stat='Average'): | |
| if offset >= 64: |
I hereby claim:
To claim this, I am signing this object:
| import csv | |
| import requests | |
| from bs4 import BeautifulSoup | |
| base_url = "http://www.pro-football-reference.com" | |
| next_page = "/super-bowl/i.htm" | |
| # this website doesn't have a "next" button to SB XLIX, for some reason | |
| extra = ['/super-bowl/xlix.htm'] | |
| def pair(a, b): |