jaklinger’s gists

jaklinger / terminate_running.py

Created May 21, 2018 10:58

Terminate all RUNNING AND RUNNABLE processes in boto3

	import time
	import boto3
	client = boto3.client('batch')
	i = 0
	for job in client.list_jobs(jobQueue="HighPriority", jobStatus="RUNNABLE")["jobS
	ummaryList"]:
	i+=1
	print(job["jobId"])
	client.terminate_job(jobId=job["jobId"], reason="Too slow")
	time.sleep(0.2)

jaklinger / strapper.py

Created June 15, 2018 11:10

Randomly chunk up an iterable, useful for sampling efficiently

	import random

	def chunks(whole, n_chunks):
	'''Randomly chunk up an iterable'''
	# Make sure that it makes sense to chunk up the object
	if n_chunks > len(whole) or n_chunks <= 0:
	yield whole
	return

	# Copy the iterable (we'll delete it later anyway) and shuffle it

jaklinger / luigi_ec2_setup

Created June 29, 2018 13:46

Instructions for setting up Luigi server on AWS EC2

	Note: THIS IS NOT A SHELL SCRIPT


	cd /dev/shm
	wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
	bash Miniconda3-latest-Linux-x86_64.sh


	<LOG OUT AND IN>
	conda create --name py36 python=3.6

jaklinger / papers_by_fos.py

Created July 5, 2018 09:02

Get papers from Open Academic Graph by FOS

	import pandas as pd
	from sqlalchemy import create_engine
	from sqlalchemy.sql import text as sql_text
	from collections import defaultdict

	with open('/Users/jklinger/Nesta-AWS/AWS-RDS-config/open-academic-graph.config') as f:
	host, port, database, user, password = f.read().split(':')
	database_uri = 'postgresql://{}:{}@{}/{}'.format(user, password, host, "microsoft_academic_graph")
	con = create_engine(database_uri)

jaklinger / api_hitter.html

Created July 19, 2018 13:12

Reminder to self about how to hit my own flask API (to clio)

	<!DOCTYPE html>
	<html>
	<body>

	<!-- The search bar -->
	<input type='text' id="query_data"/>
	<button onclick="hitAPI()"> Submit </button>
	<br>

	<!-- Where the response will end up -->

jaklinger / SubsetMatrices.py

Created August 24, 2018 19:05

	from itertools import chain, combinations
	def all_subsets(n):
	return chain(*map(lambda x: combinations(range(0,n), x), range(2, n+1)))

	def subset_matrix(n):
	rows = []
	for subset in all_subsets(n):
	new_row = [0]*n
	for i in subset:
	new_row[i] = 1

jaklinger / scan_elasticsearch.py

Created August 28, 2018 15:55

Getting all documents from an Elasticsearch database. Note this method doesn't rank results in any way. For that, you should use the search API, and accept that you will get the top N results.

	'''Getting all documents from an Elasticsearch database.
	Note this method doesn't rank results in any way. For that, you should
	use the search API, and accept that you will get the top N results.'''

	from elasticsearch.helpers import scan
	from elasticsearch import Elasticsearch

	ENDPOINT = "" # <=== Enter an endpoint URI here
	es = Elasticsearch(ENDPOINT, index="rwjf", doc_type="world_reporter")

jaklinger / s3_to_pandas.py

Last active April 7, 2019 03:01

Read CSV (or JSON etc) from AWS S3 to a Pandas dataframe

	import boto3
	import pandas as pd
	from io import BytesIO

	bucket, filename = "bucket_name", "filename.csv"

	s3 = boto3.resource('s3')
	obj = s3.Object(bucket, filename)
	with BytesIO(obj.get()['Body'].read()) as bio:
	df = pd.read_csv(bio)

jaklinger / json_to_s3.py

Created November 7, 2018 13:04

Write json to s3

	import boto3
	import json
	from io import StringIO
	s3 = boto3.resource("s3")
	data = [{"a":1, "c":3},{"b":2}]
	s3_obj = s3.Object(<BUCKET NAME>, <FILE MAME>)
	s3_obj.put(Body=json.dumps(data))

jaklinger / s3_bucket_to_pandas.py

Created November 7, 2018 14:01

Read entire content of s3 bucket to pandas dataframe

	import boto3
	import pandas as pd
	from io import BytesIO

	bucket = "innovation-mapping-general"
	directory = "nih_all_processed_data/"
	s3 = boto3.resource('s3')
	dfs = []
	for key in s3.Bucket(bucket).objects.all():
	if not key.key.startswith(directory):

Joel Klinger jaklinger