Skip to content

Instantly share code, notes, and snippets.

View jaklinger's full-sized avatar

Joel Klinger jaklinger

View GitHub Profile
@jaklinger
jaklinger / get_calender_info.py
Last active October 28, 2021 09:32
Read google calender info
import datetime
from googleapiclient.discovery import build
from httplib2 import Http
from oauth2client import file, client, tools
from collections import defaultdict
import json
from googleapiclient.errors import HttpError
# If modifying these scopes, delete the file token.json.
SCOPES = 'https://www.googleapis.com/auth/calendar.readonly'
@jaklinger
jaklinger / prepare_title_for_microsoft_academic_graph.py
Created February 28, 2019 11:01
Prepare paper titles for Microsoft Academic Graph
from alphabet_detector import AlphabetDetector
ALPHABET_DETECTOR = AlphabetDetector()
def prepare_title(title):
# Replace non-alphanums (allowing foreign characters)
result = "".join([x
if len(ALPHABET_DETECTOR.detect_alphabet(x)) > 0
or x.isnumeric()
else " " for x in title.lower()])
@jaklinger
jaklinger / es_keyword_expansion.py
Last active March 4, 2020 22:25
How to get keyword expansion using elasticsearch
import requests
import json
def make_query(url, q, alg, field, shard_size=1000, size=25):
"""Get keywords relating to the input query, directly from Elasticsearch
Args:
url (str): The Elasticsearch endpoint you want to query
q (str): The query you want to retrieve keywords for
alg (str): An algorithm from https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-significantterms-aggregation.html#_parameters
@jaklinger
jaklinger / ngrams.py
Created September 5, 2019 09:06
Gettting nestta ngram data from the database
from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL
from configparser import ConfigParser
import pandas as pd
def get_engine(config_path, database="production", **engine_kwargs):
'''Get a SQL alchemy engine from config'''
cp = ConfigParser()
cp.read(config_path)
cp = cp["client"]
@jaklinger
jaklinger / placename_hierarchy.py
Created September 22, 2019 11:16
Create British name lookup (no Northern Ireland)
import pandas as pd
import os
import json
TOPDIR = "path/to/opname_csv_gb/"
HEADER_PATH = os.path.join(TOPDIR,'DOC','OS_Open_Names_Header.csv')
def extract_tree(df, levels, ilvl=0):
lvl = levels[ilvl]
entities = []
@jaklinger
jaklinger / mongo_to_files.py
Created September 26, 2019 12:50
Write data from mongodb to json files, in chunks.
import pymongo
from pymongo import MongoClient
import pandas as pd
def mongo_to_files(usr_name, pwd, address, db_name, collection,
out_path='out{}-{}.json', chunk_size=10000, port=27017):
client = MongoClient(f'mongodb://{usr_name}:{pwd}@{address}', port)
db = client[db_name]
collection = db[collection]
total = collection.count()
@jaklinger
jaklinger / make_users.sh
Created October 2, 2019 10:44
Create users with a dummy password, cloned from the current user
ALL_USERS="jklinger sgarasto jdjumali csleeman rleung"
TOPDIR=$PWD
for USERNAME in $ALL_USERS;
do
cd $TOPDIR
sudo useradd --create-home $USERNAME
echo "$USERNAME" | sudo passwd --stdin $USERNAME
sudo usermod -aG ec2-user $USERNAME
sudo usermod -aG wheel $USERNAME
sudo cp -rp $HOME/* /home/$USERNAME/
@jaklinger
jaklinger / cooc.sql
Created October 2, 2019 13:21
MySQL co-occurences from a link table (example treats docs as binarised vector of skills)
CREATE TEMPORARY TABLE tmp_skills_a
PRIMARY key doc_id
SELECT doc_id, skill_id
FROM skills_link_table
GROUP BY doc_id, skill_id;
CREATE TEMPORARY TABLE tmp_skills_b
PRIMARY key doc_id
SELECT doc_id, skill_id
FROM skills_link_table
@jaklinger
jaklinger / assym.py
Created November 1, 2019 16:43
Asymmetry measurement of a square matrix
import numpy as np
def assym(a):
return 1 - (np.linalg.det(0.5*(a + a.T)) / np.linalg.det(a))
for a in ([[10,123,0],[123,10,0],[0,0,10]], [[10,123,0],[121,10,0],[0,0,10]],
[[10,123,0],[50,10,0],[0,0,10]], [[10,123,0],[0,10,0],[23,0,10]],
[[10,123,0],[-123,10,0],[5422,0,10]]):
a = np.matrix(a)
print(a)
@jaklinger
jaklinger / after_decorator.py
Created May 5, 2020 15:42
after decorator example
def do_the_other_thing(run, output):
def wrap(self):
run(self)
output(self)
return wrap
class A:
name='a'
def run(self):
pass