Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save abhijeet-talaulikar/1829c82386e526f84e7641b3b39cc241 to your computer and use it in GitHub Desktop.
Save abhijeet-talaulikar/1829c82386e526f84e7641b3b39cc241 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
import pandas as pd
import swifter
import cleantext
# Load finance headlines from the dataset
data = pd.read_csv("raw_partner_headlines.csv", usecols=["headline"])
# Perform basic preprocessing steps using cleantext
data['headline_clean'] = data['headline'].swifter.apply(lambda x:
' '.join(
cleantext.clean_words(
x,
clean_all= False,
extra_spaces=True,
stopwords=True,
lowercase=True,
numbers=True,
punct=True,
reg = r'\W*\b\w{1,2}\b',
reg_replace = '',
stp_lang='english'
)
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment