source: pytorch/pytorch#1137 (comment)
Follow these steps in order to handle corrupted images:
Return None in the getitem() if the image is corrupted
def __getitem__(self, idx):
try:
img, label = load_img(idx)
# # create a file `create_rd37_env.sh` and add this snippet to it | |
# get conda path using `which conda` | |
/home/ec2-user/anaconda3/condabin/conda init bash | |
conda create -y --name rd37 python=3.7 | |
conda activate rd37 | |
conda install -y ipykernel | |
python -m ipykernel install --user --name rd37 --display-name "rd37" | |
conda install -y -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 |
source: pytorch/pytorch#1137 (comment)
Follow these steps in order to handle corrupted images:
Return None in the getitem() if the image is corrupted
def __getitem__(self, idx):
try:
img, label = load_img(idx)
# https://github.com/MaartenGr/Concept | |
# !pip install concept umap-learn matplotlib | |
import glob | |
import hdbscan | |
import umap | |
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer | |
from concept import ConceptModel | |
imgs_folder_path = './imgs' |
import boto3 | |
import json | |
s3_bucket = 'production-db' | |
s3 = boto3.client('s3') | |
def get_json_from_s3(key: str, bucket=s3_bucket): | |
""" | |
Retrieves the json file containing responses from s3. returns a dict |
import eventlet,sys | |
# note: this urllib import doesn't work in Python2 | |
from eventlet.green.urllib.request import urlopen | |
file = sys.argv[1] # list of urls | |
with open(file,'r') as f: | |
urls = [x.rstrip() for x in f.readlines()] | |
urls = urls + urls | |
urls = urls + urls | |
urls = urls + urls |
# https://stackoverflow.com/a/63021289/5554394 | |
from subprocess import run | |
from time import sleep | |
# Path and name to the script you are trying to start | |
file_path = "test.py" | |
restart_timer = 2 | |
def start_script(): |
import nltk | |
import nltk | |
nltk.download('averaged_perceptron_tagger') | |
lines = 'lines is some string of words' | |
def filter_nouns_only(text): | |
tokenized = nltk.word_tokenize(text) | |
nouns = [word for (word, pos) in nltk.pos_tag(tokenized) if(pos[:2] == 'NN')] | |
return " ".join(nouns) |
# pip install git+https://github.com/ScalaConsultants/Aspect-Based-Sentiment-Analysis | |
import aspect_based_sentiment_analysis as absa | |
nlp = absa.load() | |
text = ("We are great fans of Slack, but we wish the subscriptions " | |
"were more accessible to small startups.") | |
slack, price = nlp(text, aspects=['slack', 'price']) | |
print(slack.sentiment, slack.sentiment.value) |
from nltk.stem.porter import PorterStemmer | |
import re | |
def extract_sentence_that_contain_keyword(keyword, text): | |
stemmer = PorterStemmer() | |
stemmed_keyword = stemmer.stem(keyword) | |
stemmed_text = ' '.join([stemmer.stem(x) for x in text.split()]) |
import re | |
def cleanhtml(raw_html): | |
#Some HTML texts can also contain entities that are not enclosed in brackets, such as '&nsbm'. If that is the case, then you might want to write the regex as | |
CLEANR = re.compile('<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});') | |
# as per recommendation from @freylis, compile once only | |
CLEANR = re.compile('<.*?>') | |
cleantext = re.sub(CLEANR, '', raw_html) |