Skip to content

Instantly share code, notes, and snippets.

View lambdaofgod's full-sized avatar
🤗
opensource

Jakub Bartczuk lambdaofgod

🤗
opensource
View GitHub Profile
import torch
import ot
from sklearn import metrics
roberta = torch.hub.load('pytorch/fairseq', 'roberta.large')
roberta.eval() # disable dropout (or leave in train mode to finetune)
def get_roberta_features(text):
import tqdm
def embed_columns(df, input_columns, embedder, batch_size=8):
"""
embedder: tf.hub module
"""
embeddings = {}
for text in tqdm.tqdm(input_columns):
# add this if you want to activate a custom tmux session with sourced conda
# requires autojump as j
tmux_guild() {
j "$1"
tmux new -d -s guild
tmux send-keys -t guild "conda activate guild" Enter
tmux a -t guild
}
@lambdaofgod
lambdaofgod / cloud_function.py
Last active February 24, 2020 09:54
simple search engine based on BM25
import rank_bm25
import pandas as pd
import numpy as np
class SearchEngine:
def __init__(self, df, bm25_cls=rank_bm25.BM25Okapi, text_col='text'):
self.df = df[~df[text_col].isna()]
self.bm25 = bm25_cls(self.df[text_col].str.split())
@lambdaofgod
lambdaofgod / get_everything_from_index.py
Created November 4, 2019 15:36
get all records from elasticsearch index
from elasticsearch_dsl import Search
def get_everything_from_index(es, index):
search_results = Search(index=index).using(es).scan()
for hit in search_results:
yield hit.to_dict()
df = read.table('CH01PR20.txt', header=FALSE)
x = df[,2]
y = df[,1]
n = dim(df)[1]
plot(x, y)
@lambdaofgod
lambdaofgod / load_image_from_url.py
Last active March 5, 2024 19:06
Load image from url with requests using headers (this circumvents failing skimage.io.imread)
import requests
from PIL import Image
from io import BytesIO
import numpy as np
headers = {
'user-agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
}
@lambdaofgod
lambdaofgod / download_images.py
Last active September 18, 2019 12:59
Download images given links in csv file
import fire
import tqdm
from concurrent.futures import ProcessPoolExecutor
import pandas as pd
import skimage.io
import os
from PIL import Image
import requests
from io import BytesIO
import mlutil.parallel
pip install gensim seaborn wordcloud rank_bm25
pip install git+https://github.com/lambdaofgod/mlutil
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.