Skip to content

Instantly share code, notes, and snippets.

View interrogator's full-sized avatar

Daniel interrogator

  • UZH
  • Zurich, Switzerland
View GitHub Profile
@interrogator
interrogator / keywording.py
Last active August 29, 2015 14:23
keywording for eugene to look at
# keyword calculation for a particular word
# our aim is to use log likelihood to calculate the 'keyness' of a word. common practice.
# a normal application would be to loop through wordlists and gives every word a keyness score.
# here, we'll just get 'apple', as an example
# reference_corpus = a dictionary of words and their frequencies in a large dataset
# target_corpus = a dictionary of words and their frequencies in a smaller dataset
# our example word
from setuptools import setup, find_packages
from setuptools.command.install import install
class install_with_nltk_extras(install):
"""Customized setuptools install command - prints a friendly greeting."""
def run(self):
install.run(self)
import nltk
nltk.download('punkt')
nltk.download('wordnet')
% gobble allows you to indent in your tex file
% rerun code only if it changes
\usepackage[gobble=auto,rerun=modified]{pythontex}
% a figure showing code and the image it produces
\begin{figure}[htb!]
% make a text box
\begin{mdframed}[backgroundcolor=gray!4] \footnotesize \singlespacing
import pickle
import os
bnc = pickle.load(open('bnc.p', 'rb'))
lst = ['clinic', 'hospital', 'patient']
for w in lst:
print '%s: %d occurrences' % ( w, bnc[w])
LET us go then, you and I,
When the evening is spread out against the sky
Like a patient etherized upon a table;
Let us go, through certain half-deserted streets,
The muttering retreats 5
Of restless nights in one-night cheap hotels
And sawdust restaurants with oyster-shells:
Streets that follow like a tedious argument
Of insidious intent
To lead you to an overwhelming question…. 10
#!/usr/bin/python
def nextbus(where = 'u', walking_time = 3, nbus = 5):
"""tells you when the next bus is from home or to uni
where: 'h'/'u'
walking_time: don't show buses less than n mins from now
nbus: show next n buses
"""
walking_time = int(walking_time)
@interrogator
interrogator / bus.py
Last active May 16, 2016 16:55
Tells you when the next bus is coming
#!/usr/bin/python
def nextbus(where = 'guess', nbus = 5, walking_time = 3):
"""
tells you when the next bus is from home or to uni
where: 'h'/'u'/'guess': home, uni, or try to guess based on wifi connection name
walking_time: don't show buses less than n mins from now
nbus: show next n buses
@interrogator
interrogator / sorter.py
Created February 8, 2016 15:47
sort a list of numbers in one line
# a list of ints
l = [4, 2, 6, 7, 8, 1, 50, 23, 13, 55, 12, 3]
# one line to sort them!
[l.insert(ind, l.pop(l.index(min(l[ind:])))) for ind in range(len(l))]
# see?
print(l)
# elaborated code
from corpkit import *
corpus = corpus('corpus_name')
langmod = corpus.make_language_model('modelname')
# score string
langmod.score('Check similarity for this text to each corpus')
# score file
corpus_file = corpus.subcorpora[5].files[1]
langmod.score(corpus_file)
@interrogator
interrogator / dendo.py
Last active June 7, 2016 12:05
dendogramming
%matplotlib notebook
import seaborn as sns
import numpy as np
from scipy.spatial.distance import pdist
from scipy.cluster.hierarchy import linkage, dendrogram
# pdist can be 'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine',
# 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'matching',
#'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'