Skip to content

Instantly share code, notes, and snippets.

@MLWhiz
Created January 18, 2019 05:56
Show Gist options
  • Save MLWhiz/aeb2cfdd08e687ea7364f7c49a0f14c1 to your computer and use it in GitHub Desktop.
Save MLWhiz/aeb2cfdd08e687ea7364f7c49a0f14c1 to your computer and use it in GitHub Desktop.
def add_features(df):
df['question_text'] = df['question_text'].progress_apply(lambda x:str(x))
df["lower_question_text"] = df["question_text"].apply(lambda x: x.lower())
df['total_length'] = df['question_text'].progress_apply(len)
df['capitals'] = df['question_text'].progress_apply(lambda comment: sum(1 for c in comment if c.isupper()))
df['caps_vs_length'] = df.progress_apply(lambda row: float(row['capitals'])/float(row['total_length']),
axis=1)
df['num_words'] = df.question_text.str.count('\S+')
df['num_unique_words'] = df['question_text'].progress_apply(lambda comment: len(set(w for w in comment.split())))
df['words_vs_unique'] = df['num_unique_words'] / df['num_words']
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment