Skip to content

Instantly share code, notes, and snippets.

View mbstacy's full-sized avatar

Mark Stacy mbstacy

  • Harvard University
  • Boston, MA
  • 23:30 (UTC -05:00)
View GitHub Profile
import pandas as pd
import json
data =open("/Users/mstacy/Downloads/oessgpn.json","r").read()
dflist=json.loads(data)['results']
df =pd.DataFrame(dflist)
result = pd.concat([df, df['created_by'].apply(pd.Series)], axis=1).drop('created_by', axis=1)
#endpoints
endps=pd.DataFrame([x for x in result.endpoints])
endps.columns=['end1','end2']
endps.apply(pd.Series)
#!/usr/bin/env python
import os
import sys
def rename(path):
for f in os.listdir(path):
flist=f.split('-')
if len(flist) >2:
os.rename(f,"{0}.zip".format("-".join(flist[:-1])))

gitPushAdminEnforcement

gitPushAdminEnforcement uses Github API to turn off admin enforcement, run git push, and re-enable admin enforcement.

Installation:

  1. pip install PyGithub
  2. Copy gitPushAdminEnforcement in a bin directory (eg: ~/.local/bin)
  3. Add bin directory to PATH variable
  4. chmod +x ~/.local/bin/gitPushAdminEnforcement
import xmltodict
import sys
def read_xml(filename):
with open(filename,'r') as f1:
return xmltodict.parse("<root>{0}</root>".format(f1.read()),cdata_key='text',attr_prefix='',dict_constructor=dict)
def search_subjects(term,doc):
total_hits=0
total_records=0
import os
def makeCamelCase(name,filename=False,removeSpecial=False):
"""
This funciton produces camelCase variable names or filenames. You can provide option to remove special characters.
ARGS:
name (string)
KWARGS:
filename (Boolean) - default False
removeSpecial (Boolean) - default false
import requests, json, os
from sys import argv
catalog_url = "https://libapps.colorado.edu/api/catalog/data/catalog/cuscholar.json"
headers={"Content-Type":"application/json","Authorization":"Token {0}".format(os.getenv('LIBAPPS_APITOKEN'))}
def get_cuscholar_data():
#query='query={"filter":{},"projection":{"data_files.s3.key":1,"title":1,"_id":0}}'
#url = "{0}?page_size=100&{1}".format(catalog_url,query)
import pandas as pd
#error file generated in bash: $cat dm-ir.log | awk '{print $5,$8}' > errors2.txt
err=pd.read_csv('errors2.txt',delim_whitespace=True,header=None )
err.columns=['error_type','context_key']
err=err.drop_duplicates()
#Main inventory
df=pd.read_csv('data/inventory-2019-05-30.csv',converters={i: str for i in range(0, 83)})
df= df.drop_duplicates()
@mbstacy
mbstacy / EKSvsEC2.md
Last active November 22, 2025 13:37
EKS vs EC2 Kubernetes Costs

Kubernetes Cost Comparison EC2 vs EKS

EKS Kubernetes

AWS EKS service provides the Kubernetes control plane. Price for EKS Service is $0.20 per hour / cluster.

EKS Use Case Monthly Cost

Three clusters with four worker nodes.

import PyPDF2
from sys import argv
def getTextPdf(filename):
'''
Reads entire file and returns text
'''
#open allows you to read the file
with open(filename,'rb') as pdfFileObj:
#The pdfReader variable is a readable object that will be parsed