This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# stable version hosted on gist at: https://gist.github.com/alessiacoccato/a1f9b9ff5530bfcdcf084b77513358b2 | |
import os | |
import sys | |
import subprocess | |
import logging | |
from contextlib import contextmanager | |
from io import StringIO | |
INSTALLED = False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# !pip install humanfriendly loguru | |
from humanfriendly import format_timespan | |
from loguru import logger | |
import time | |
class Logwatch: | |
def __init__(self, name='Task', level='DEBUG'): | |
self.started = None | |
self.ended = None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys,os,bs4 | |
def nb2py(fn): | |
if not os.path.exists(fn): return | |
os.system(f'jupyter nbconvert --to markdown {fn}') | |
fn_md=os.path.splitext(fn)[0]+'.md' | |
if not os.path.exists(fn_md): return | |
with open(fn_md) as f: txt=f.read() | |
dom=bs4.BeautifulSoup(txt,'lxml') |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Simple mofo'n parallelism with progress bar. Born of frustration with p_tqdm. | |
""" | |
def pmap_do(inp): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# install | |
# pip install bs4 fulltext epub-conversion pymupdf requests xml_cleaner html2text kitchen -q | |
# imports | |
import os | |
from kitchen.text.converters import to_unicode | |
# constants | |
WORKING_EXTS={'txt','pdf','epub','html','xml','htm'} | |
CONTENT_TAGS={'xml':['p'],'html':['p'],'htm':['p'],'epub':['p']} |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def draw_graph3(networkx_graph,notebook=True,output_filename='graph.html',show_buttons=True,only_physics_buttons=False): | |
""" | |
This function accepts a networkx graph object, | |
converts it to a pyvis network object preserving its node and edge attributes, | |
and both returns and saves a dynamic network visualization. | |
Valid node attributes include: | |
"size", "value", "title", "x", "y", "label", "color". | |
(For more info: https://pyvis.readthedocs.io/en/latest/documentation.html#pyvis.network.Network.add_node) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def measure_semantic_shift_by_neighborhood(model1,model2,word,k=25,verbose=False): | |
""" | |
Basic implementation of William Hamilton (@williamleif) et al's measure of semantic change | |
proposed in their paper "Cultural Shift or Linguistic Drift?" (https://arxiv.org/abs/1606.02821), | |
which they call the "local neighborhood measure." They find this measure better suited to understand | |
the semantic change of nouns owing to "cultural shift," or changes in meaning "local" to that word, | |
rather than global changes in language ("linguistic drift") use that are better suited to a | |
Procrustes-alignment method (also described in the same paper.) | |
Arguments are: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Code to make a network out of the shortest N cosine-distances (or, equivalently, the strongest N associations) | |
between a set of words in a gensim word2vec model. | |
To use: | |
Set the filenames for the word2vec model. | |
Set `my_words` to be a list of your own choosing. | |
Set `num_top_dists` to be a number or a factor of the length of `my_words.` | |
Choose between the two methods below to produce distances, and comment-out the other one. | |
""" |
NewerOlder