source: pytorch/pytorch#1137 (comment)
Follow these steps in order to handle corrupted images:
Return None in the getitem() if the image is corrupted
def __getitem__(self, idx):
try:
img, label = load_img(idx)
| # # create a file `create_rd37_env.sh` and add this snippet to it | |
| # get conda path using `which conda` | |
| /home/ec2-user/anaconda3/condabin/conda init bash | |
| conda create -y --name rd37 python=3.7 | |
| conda activate rd37 | |
| conda install -y ipykernel | |
| python -m ipykernel install --user --name rd37 --display-name "rd37" | |
| conda install -y -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 |
source: pytorch/pytorch#1137 (comment)
Follow these steps in order to handle corrupted images:
Return None in the getitem() if the image is corrupted
def __getitem__(self, idx):
try:
img, label = load_img(idx)
| # https://github.com/MaartenGr/Concept | |
| # !pip install concept umap-learn matplotlib | |
| import glob | |
| import hdbscan | |
| import umap | |
| from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer | |
| from concept import ConceptModel | |
| imgs_folder_path = './imgs' |
| import boto3 | |
| import json | |
| s3_bucket = 'production-db' | |
| s3 = boto3.client('s3') | |
| def get_json_from_s3(key: str, bucket=s3_bucket): | |
| """ | |
| Retrieves the json file containing responses from s3. returns a dict |
| import eventlet,sys | |
| # note: this urllib import doesn't work in Python2 | |
| from eventlet.green.urllib.request import urlopen | |
| file = sys.argv[1] # list of urls | |
| with open(file,'r') as f: | |
| urls = [x.rstrip() for x in f.readlines()] | |
| urls = urls + urls | |
| urls = urls + urls | |
| urls = urls + urls |
| # https://stackoverflow.com/a/63021289/5554394 | |
| from subprocess import run | |
| from time import sleep | |
| # Path and name to the script you are trying to start | |
| file_path = "test.py" | |
| restart_timer = 2 | |
| def start_script(): |
| import nltk | |
| import nltk | |
| nltk.download('averaged_perceptron_tagger') | |
| lines = 'lines is some string of words' | |
| def filter_nouns_only(text): | |
| tokenized = nltk.word_tokenize(text) | |
| nouns = [word for (word, pos) in nltk.pos_tag(tokenized) if(pos[:2] == 'NN')] | |
| return " ".join(nouns) |
| # pip install git+https://github.com/ScalaConsultants/Aspect-Based-Sentiment-Analysis | |
| import aspect_based_sentiment_analysis as absa | |
| nlp = absa.load() | |
| text = ("We are great fans of Slack, but we wish the subscriptions " | |
| "were more accessible to small startups.") | |
| slack, price = nlp(text, aspects=['slack', 'price']) | |
| print(slack.sentiment, slack.sentiment.value) |
| from nltk.stem.porter import PorterStemmer | |
| import re | |
| def extract_sentence_that_contain_keyword(keyword, text): | |
| stemmer = PorterStemmer() | |
| stemmed_keyword = stemmer.stem(keyword) | |
| stemmed_text = ' '.join([stemmer.stem(x) for x in text.split()]) |
| import re | |
| def cleanhtml(raw_html): | |
| #Some HTML texts can also contain entities that are not enclosed in brackets, such as '&nsbm'. If that is the case, then you might want to write the regex as | |
| CLEANR = re.compile('<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});') | |
| # as per recommendation from @freylis, compile once only | |
| CLEANR = re.compile('<.*?>') | |
| cleantext = re.sub(CLEANR, '', raw_html) |