Skip to content

Instantly share code, notes, and snippets.

View urigoren's full-sized avatar

Uri Goren urigoren

View GitHub Profile
@urigoren
urigoren / .vimrc
Last active November 28, 2021 12:27
set splitbelow
nnoremap <F4> :set hlsearch! nohlsearch?<CR>
imap <F4> <C-O><F4>
nnoremap <F2> :let @/="qoXouQoz"<CR>:set invpaste paste?<CR>
set pastetoggle=<F2>
nnoremap <silent> <F3> :let @/ .= '\\|\<'.expand('<cword>').'\>'<cr>n
nnoremap <buffer> <F5> :w<cr>:exec '!/usr/bin/python3' shellescape(@%, 1)<cr>
syntax on
import json, boto3
s3 = boto3.resource("s3").Bucket("bucket")
json.load_s3 = lambda f: json.load(s3.Object(key=f).get()["Body"])
json.dump_s3 = lambda obj, f: s3.Object(key=f).put(Body=json.dumps(obj))
@urigoren
urigoren / word2vec_train.py
Last active October 24, 2022 15:07
A command-line script to train word2vec on all text files in a dictionary using Gensim, and a predefined vocabulary.
import sys, random, json
from gensim.models import Word2Vec
from argparse import ArgumentParser
from collections import Counter
from pathlib import Path
__dir__= Path(__file__).absolute().parent
class DirSentences(object):
import sys, os, json, subprocess
from argparse import ArgumentParser
__dir__ = os.path.dirname(os.path.abspath(__file__))
def shell(cmd):
"""Run bash command"""
process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = process.communicate()
categorical_data = [
(0,1,2),
(0,1),
(0,1,3),
(0,1,3),
(0,1,3),
(0,1,2,3),
(2, 3),
(2, 3),
(2, 3),
@urigoren
urigoren / json2csv.py
Created September 26, 2018 21:49
A simple command line tool that transforms a json of format: `{"word": count}` to a readable CSV format
import json, sys
from operator import itemgetter as at
fname = sys.argv[1]
assert fname.endswith('.json')
with open(fname, 'r') as f:
d = json.load(f)
with open(fname.replace('.json', '.csv'), 'w') as f:
f.write('{k},{v}\n'.format(k="key", v="val"))
for k,v in sorted(d.items(), key=at(1), reverse=True):
f.write('"{k}",{v}\n'.format(k=k.replace('"','""'), v=v))
import re
from html import unescape
def html2text(htm):
ret = unescape(htm)
ret = ret.translate({
8209: ord('-'),
ord('`'): ord("'"),
ord('’'): ord("'"),
8220: ord('"'),
8221: ord('"'),
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
"""
A python wrapper for the icount.co.il api
https://www.icount.co.il/api-v3/
"""
import json
from urllib import request, parse
def post(url, data):
req = request.Request(url, data=parse.urlencode(data).encode())
import numpy as np
import collections, itertools, string
from scipy.cluster import hierarchy
from scipy.spatial import distance
from sklearn.feature_extraction import text
from editdistance import distance as editdistance
def edit_pdist(toks, normalize=False):
"""Return pairwise editdistance matrix"""
n = len(toks)