Skip to content

Instantly share code, notes, and snippets.

@amrakm
amrakm / create_rd37_env.sh
Last active September 21, 2022 09:13
create conda env from shell script
# # create a file `create_rd37_env.sh` and add this snippet to it
# get conda path using `which conda`
/home/ec2-user/anaconda3/condabin/conda init bash
conda create -y --name rd37 python=3.7
conda activate rd37
conda install -y ipykernel
python -m ipykernel install --user --name rd37 --display-name "rd37"
conda install -y -c conda-forge cudatoolkit=11.2 cudnn=8.1.0
@amrakm
amrakm / torch_data_loader_w_corrupted_imgs.md
Created September 7, 2022 20:50
torch_data_loader_w_corrupted_imgs.md

source: pytorch/pytorch#1137 (comment)

Follow these steps in order to handle corrupted images:

Return None in the getitem() if the image is corrupted

def __getitem__(self, idx):
    try:
 img, label = load_img(idx)
@amrakm
amrakm / cluster_images.py
Last active September 2, 2022 16:10
cluster images using CLIP embeddings
# https://github.com/MaartenGr/Concept
# !pip install concept umap-learn matplotlib
import glob
import hdbscan
import umap
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from concept import ConceptModel
imgs_folder_path = './imgs'
@amrakm
amrakm / load_json_from_s3.py
Created August 24, 2022 15:13
load json from s3
import boto3
import json
s3_bucket = 'production-db'
s3 = boto3.client('s3')
def get_json_from_s3(key: str, bucket=s3_bucket):
"""
Retrieves the json file containing responses from s3. returns a dict
@amrakm
amrakm / download_url_parallel.py
Created July 1, 2022 13:16
download url parallel
import eventlet,sys
# note: this urllib import doesn't work in Python2
from eventlet.green.urllib.request import urlopen
file = sys.argv[1] # list of urls
with open(file,'r') as f:
urls = [x.rstrip() for x in f.readlines()]
urls = urls + urls
urls = urls + urls
urls = urls + urls
@amrakm
amrakm / restart_crashing_python_script.py
Last active June 20, 2022 13:58
restart python script if it crashes
# https://stackoverflow.com/a/63021289/5554394
from subprocess import run
from time import sleep
# Path and name to the script you are trying to start
file_path = "test.py"
restart_timer = 2
def start_script():
@amrakm
amrakm / filter_nouns_only.py
Last active May 26, 2022 11:19
filter nouns only
import nltk
import nltk
nltk.download('averaged_perceptron_tagger')
lines = 'lines is some string of words'
def filter_nouns_only(text):
tokenized = nltk.word_tokenize(text)
nouns = [word for (word, pos) in nltk.pos_tag(tokenized) if(pos[:2] == 'NN')]
return " ".join(nouns)
@amrakm
amrakm / absa.py
Last active May 26, 2022 10:48
aspect based sentiment analysis
# pip install git+https://github.com/ScalaConsultants/Aspect-Based-Sentiment-Analysis
import aspect_based_sentiment_analysis as absa
nlp = absa.load()
text = ("We are great fans of Slack, but we wish the subscriptions "
"were more accessible to small startups.")
slack, price = nlp(text, aspects=['slack', 'price'])
print(slack.sentiment, slack.sentiment.value)
@amrakm
amrakm / extract_sentence_that_contain_keyword.py
Created May 26, 2022 10:16
find sentences that contains a keyword - stemmed string match
from nltk.stem.porter import PorterStemmer
import re
def extract_sentence_that_contain_keyword(keyword, text):
stemmer = PorterStemmer()
stemmed_keyword = stemmer.stem(keyword)
stemmed_text = ' '.join([stemmer.stem(x) for x in text.split()])
@amrakm
amrakm / clean_html.py
Created April 29, 2022 11:34
clean text from html tags
import re
def cleanhtml(raw_html):
#Some HTML texts can also contain entities that are not enclosed in brackets, such as '&nsbm'. If that is the case, then you might want to write the regex as
CLEANR = re.compile('<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});')
# as per recommendation from @freylis, compile once only
CLEANR = re.compile('<.*?>')
cleantext = re.sub(CLEANR, '', raw_html)