Vibhu Jawa VibhuJawa

🏠

Working from home

Senior Software Engineer @nvdia | Former CS Grad student at Johns Hopkins University

VibhuJawa / nvstrings_master_perf.ipynb

Created June 18, 2019 00:23

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

VibhuJawa / nvstrings_07_performance.ipynb

Created June 18, 2019 00:23

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

VibhuJawa / guteburg_read_tokenize.ipynb

Created July 23, 2019 17:45

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

VibhuJawa / hello_world.py

Created July 23, 2019 19:17

HELLO_WORLD

print("Hello World")

VibhuJawa / read_cell_gutenburg.py

Last active August 26, 2019 18:27

	def get_non_empty_lines(lines):
	"""
	returns non empty lines from a list of lines
	"""
	clean_lines = []
	for line in lines:
	str_line = line.strip()
	if str_line:
	clean_lines.append(str_line)
	return clean_lines

VibhuJawa / removing_punctuation_example.py

Last active July 26, 2019 19:02

	# remove the following punctuation/characters from cudf
	filters = [ '!', '"', '#', '$', '%', '&', '(', ')', '*', '+', '-', '.', '/', '\\', ':', ';', '<', '=', '>',
	'?', '@', '[', ']', '^', '_', '`', '{', '\|', '}', '\~', '\t','\\n',"'",",",'~' , '—']

	text_col_sample = df.head(5)
	text_col_sample['text'].to_pandas()

	text_col_sample['text_clean'] = text_col_sample['text'].str.replace_multi(filters, ' ', regex=False)
	text_col_sample['text_clean'] = text_col_sample['text_clean'].str.lower()
	text_col_sample['text_clean'].to_pandas()

VibhuJawa / to_lower.py

Last active July 25, 2019 17:21

	text_col_sample['text_clean'] = text_col_sample['text_clean'].str.lower()
	text_col_sample['text_clean'].to_pandas()

VibhuJawa / remove_stop_words.py

Last active July 24, 2019 23:43

	STOPWORDS = nltk.corpus.stopwords.words('english')
	STOPWORDS = nvstrings.to_device(STOPWORDS)

	text_col_sample['text_clean'] = nvtext.replace_tokens(text_col_sample['text_clean'].data, STOPWORDS, ' ')
	text_col_sample['text_clean'].to_pandas()

VibhuJawa / remove_whitespaces.py

Last active July 24, 2019 23:43

	text_col_sample['text_clean'] = text_col_sample['text_clean'].str.replace(r"\s+", ' ',regex=True)
	text_col_sample['text_clean'] = text_col_sample['text_clean'].str.strip(' ')
	text_col_sample['text_clean'].to_pandas()

VibhuJawa / cudf_pre_processing.py

Created July 24, 2019 03:09

	STOPWORDS = nltk.corpus.stopwords.words('english')

	filters = [ '!', '"', '#', '$', '%', '&', '(', ')', '*', '+', '-', '.', '/', '\\', ':', ';', '<', '=', '>',
	'?', '@', '[', ']', '^', '_', '`', '{', '\|', '}', '\~', '\t','\\n',"'",",",'~' , '—']

	def preprocess_text(input_strs , filters=None , stopwords=STOPWORDS):
	"""
	* filter punctuation
	* to_lower
	* remove stop words (from nltk corpus)