This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
grep -Hrn "import re" . --include \*.ipynb |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from psycopg2.extras import execute_batch | |
def commit(self,vals)->None: | |
conn = self.conn | |
cursor = conn.cursor() | |
sql = """ | |
update "movies" | |
set imdb_vote_count = %(votes)s, | |
imdb_rating = %(rating)s, | |
"updatedAt" = now() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create logger | |
logger = logging.getLogger() | |
logger.setLevel(logging.DEBUG) | |
# Create STDERR handler | |
handler = logging.StreamHandler(sys.stderr) | |
# ch.setLevel(logging.DEBUG) | |
# Create formatter and add it to the handler | |
formatter = logging.Formatter('%(name)s - %(levelname)s - %(message)s') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
strategy = tf.distribute.MirroredStrategy() | |
print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) # it prints 2 .. correct | |
with strategy.scope(): | |
embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder-cmlm/multilingual-base/1") | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import transformers | |
import torch | |
from collections import defaultdict | |
from transformers import pipeline | |
text_batch = """Natural language processing (NLP) is an area of computer science and artificial intelligence concerned with the interactions between computers and human (natural) languages, in particular how to program computers to process and analyze large amounts of natural language data. It is the branch of machine learning which is about analyzing any text and handling predictive analysis.""" | |
kw_list = ['computer', 'machine learning', 'knowledge', 'language'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import psycopg2 | |
import logging | |
def execute_sql(sql, conn_str): | |
conn = psycopg2.connect(conn_str) | |
cursor = conn.cursor() | |
try: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#source: https://stackoverflow.com/a/42012264 | |
def melt_list_column(df, lst_col): | |
return pd.DataFrame({ | |
col:np.repeat(df[col].values, df[lst_col].str.len()) | |
for col in df.columns.difference([lst_col]) | |
}).assign(**{lst_col:np.concatenate(df[lst_col].values)})[df.columns.tolist()] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#source: stackoverflow answer, can't find the link anymore | |
import numpy as np | |
a = np.array([[[10,2], | |
[5,3], | |
[4,4]], | |
[[7,6], | |
[4,2], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#source: https://www.maartengrootendorst.com/blog/concept/ | |
import random | |
import nltk | |
nltk.download("wordnet") | |
from nltk.corpus import wordnet as wn | |
all_nouns = [word for synset in wn.all_synsets('n') for word in synset.lemma_names() | |
if "_" not in word] | |
selected_nouns = random.sample(all_nouns, 50_000) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy.sparse import coo_matrix | |
def create_sparse_matrix(data, iid2cf_idx, uid2cf_idx, item_id_column, user_id_column): | |
max_item_idx = max(iid2cf_idx.values()) | |
max_user_idx = max(uid2cf_idx.values()) | |