This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# keyword calculation for a particular word | |
# our aim is to use log likelihood to calculate the 'keyness' of a word. common practice. | |
# a normal application would be to loop through wordlists and gives every word a keyness score. | |
# here, we'll just get 'apple', as an example | |
# reference_corpus = a dictionary of words and their frequencies in a large dataset | |
# target_corpus = a dictionary of words and their frequencies in a smaller dataset | |
# our example word |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from setuptools import setup, find_packages | |
from setuptools.command.install import install | |
class install_with_nltk_extras(install): | |
"""Customized setuptools install command - prints a friendly greeting.""" | |
def run(self): | |
install.run(self) | |
import nltk | |
nltk.download('punkt') | |
nltk.download('wordnet') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
% gobble allows you to indent in your tex file | |
% rerun code only if it changes | |
\usepackage[gobble=auto,rerun=modified]{pythontex} | |
% a figure showing code and the image it produces | |
\begin{figure}[htb!] | |
% make a text box | |
\begin{mdframed}[backgroundcolor=gray!4] \footnotesize \singlespacing |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pickle | |
import os | |
bnc = pickle.load(open('bnc.p', 'rb')) | |
lst = ['clinic', 'hospital', 'patient'] | |
for w in lst: | |
print '%s: %d occurrences' % ( w, bnc[w]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
LET us go then, you and I, | |
When the evening is spread out against the sky | |
Like a patient etherized upon a table; | |
Let us go, through certain half-deserted streets, | |
The muttering retreats 5 | |
Of restless nights in one-night cheap hotels | |
And sawdust restaurants with oyster-shells: | |
Streets that follow like a tedious argument | |
Of insidious intent | |
To lead you to an overwhelming question…. 10 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
def nextbus(where = 'u', walking_time = 3, nbus = 5): | |
"""tells you when the next bus is from home or to uni | |
where: 'h'/'u' | |
walking_time: don't show buses less than n mins from now | |
nbus: show next n buses | |
""" | |
walking_time = int(walking_time) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
def nextbus(where = 'guess', nbus = 5, walking_time = 3): | |
""" | |
tells you when the next bus is from home or to uni | |
where: 'h'/'u'/'guess': home, uni, or try to guess based on wifi connection name | |
walking_time: don't show buses less than n mins from now | |
nbus: show next n buses |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# a list of ints | |
l = [4, 2, 6, 7, 8, 1, 50, 23, 13, 55, 12, 3] | |
# one line to sort them! | |
[l.insert(ind, l.pop(l.index(min(l[ind:])))) for ind in range(len(l))] | |
# see? | |
print(l) | |
# elaborated code |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from corpkit import * | |
corpus = corpus('corpus_name') | |
langmod = corpus.make_language_model('modelname') | |
# score string | |
langmod.score('Check similarity for this text to each corpus') | |
# score file | |
corpus_file = corpus.subcorpora[5].files[1] | |
langmod.score(corpus_file) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%matplotlib notebook | |
import seaborn as sns | |
import numpy as np | |
from scipy.spatial.distance import pdist | |
from scipy.cluster.hierarchy import linkage, dendrogram | |
# pdist can be 'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine', | |
# 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'matching', | |
#'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule' |