Skip to content

Instantly share code, notes, and snippets.

amrakm /
Last active September 21, 2022 09:13
create conda env from shell script
# # create a file `` and add this snippet to it
# get conda path using `which conda`
/home/ec2-user/anaconda3/condabin/conda init bash
conda create -y --name rd37 python=3.7
conda activate rd37
conda install -y ipykernel
python -m ipykernel install --user --name rd37 --display-name "rd37"
conda install -y -c conda-forge cudatoolkit=11.2 cudnn=8.1.0
amrakm /
Created September 7, 2022 20:50

source: pytorch/pytorch#1137 (comment)

Follow these steps in order to handle corrupted images:

Return None in the getitem() if the image is corrupted

def __getitem__(self, idx):
 img, label = load_img(idx)
amrakm /
Last active September 2, 2022 16:10
cluster images using CLIP embeddings
# !pip install concept umap-learn matplotlib
import glob
import hdbscan
import umap
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from concept import ConceptModel
imgs_folder_path = './imgs'
amrakm /
Created August 24, 2022 15:13
load json from s3
import boto3
import json
s3_bucket = 'production-db'
s3 = boto3.client('s3')
def get_json_from_s3(key: str, bucket=s3_bucket):
Retrieves the json file containing responses from s3. returns a dict
amrakm /
Created July 1, 2022 13:16
download url parallel
import eventlet,sys
# note: this urllib import doesn't work in Python2
from import urlopen
file = sys.argv[1] # list of urls
with open(file,'r') as f:
urls = [x.rstrip() for x in f.readlines()]
urls = urls + urls
urls = urls + urls
urls = urls + urls
amrakm /
Last active June 20, 2022 13:58
restart python script if it crashes
from subprocess import run
from time import sleep
# Path and name to the script you are trying to start
file_path = ""
restart_timer = 2
def start_script():
amrakm /
Last active May 26, 2022 11:19
filter nouns only
import nltk
import nltk'averaged_perceptron_tagger')
lines = 'lines is some string of words'
def filter_nouns_only(text):
tokenized = nltk.word_tokenize(text)
nouns = [word for (word, pos) in nltk.pos_tag(tokenized) if(pos[:2] == 'NN')]
return " ".join(nouns)
amrakm /
Last active May 26, 2022 10:48
aspect based sentiment analysis
# pip install git+
import aspect_based_sentiment_analysis as absa
nlp = absa.load()
text = ("We are great fans of Slack, but we wish the subscriptions "
"were more accessible to small startups.")
slack, price = nlp(text, aspects=['slack', 'price'])
print(slack.sentiment, slack.sentiment.value)
amrakm /
Created May 26, 2022 10:16
find sentences that contains a keyword - stemmed string match
from nltk.stem.porter import PorterStemmer
import re
def extract_sentence_that_contain_keyword(keyword, text):
stemmer = PorterStemmer()
stemmed_keyword = stemmer.stem(keyword)
stemmed_text = ' '.join([stemmer.stem(x) for x in text.split()])
amrakm /
Created April 29, 2022 11:34
clean text from html tags
import re
def cleanhtml(raw_html):
#Some HTML texts can also contain entities that are not enclosed in brackets, such as '&nsbm'. If that is the case, then you might want to write the regex as
CLEANR = re.compile('<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});')
# as per recommendation from @freylis, compile once only
CLEANR = re.compile('<.*?>')
cleantext = re.sub(CLEANR, '', raw_html)