duttashi · May 25, 2021 07:13
diff --git a/cleaning_text_data_using_regex.py b/cleaning_text_data_using_regex.py
 # suppose the text data is loaded in a dataframe called, df.
 # using regular expressions to clean the text data

 #Remove twitter handlers
 df.text = df.text.apply(lambda x:re.sub('@[^\s]+','',x))

 #remove hashtags
 df.text = df.text.apply(lambda x:re.sub(r'\B#\S+','',x))

 # Remove URLS
 df.text = df.text.apply(lambda x:re.sub(r"http\S+", "", x))

 # Remove all the special characters
 df.text = df.text.apply(lambda x:' '.join(re.findall(r'\w+', x)))

 # Substituting multiple spaces with single space
 df.text = df.text.apply(lambda x:re.sub(r'\s+', ' ', x, flags=re.I))
	# suppose the text data is loaded in a dataframe called, df.
	# using regular expressions to clean the text data

	#Remove twitter handlers
	df.text = df.text.apply(lambda x:re.sub('@[^\s]+','',x))

	#remove hashtags
	df.text = df.text.apply(lambda x:re.sub(r'\B#\S+','',x))

	# Remove URLS
	df.text = df.text.apply(lambda x:re.sub(r"http\S+", "", x))

	# Remove all the special characters
	df.text = df.text.apply(lambda x:' '.join(re.findall(r'\w+', x)))

	# Substituting multiple spaces with single space
	df.text = df.text.apply(lambda x:re.sub(r'\s+', ' ', x, flags=re.I))
No results found