MLWhiz · January 18, 2019 05:56
diff --git a/extra_feats.py b/extra_feats.py
 def add_features(df):
    df['question_text'] = df['question_text'].progress_apply(lambda x:str(x))
    df["lower_question_text"] = df["question_text"].apply(lambda x: x.lower())
    df['total_length'] = df['question_text'].progress_apply(len)
    df['capitals'] = df['question_text'].progress_apply(lambda comment: sum(1 for c in comment if c.isupper()))
    df['caps_vs_length'] = df.progress_apply(lambda row: float(row['capitals'])/float(row['total_length']),
                                axis=1)
    df['num_words'] = df.question_text.str.count('\S+')
    df['num_unique_words'] = df['question_text'].progress_apply(lambda comment: len(set(w for w in comment.split())))
    df['words_vs_unique'] = df['num_unique_words'] / df['num_words'] 
    return df
	def add_features(df):
	df['question_text'] = df['question_text'].progress_apply(lambda x:str(x))
	df["lower_question_text"] = df["question_text"].apply(lambda x: x.lower())
	df['total_length'] = df['question_text'].progress_apply(len)
	df['capitals'] = df['question_text'].progress_apply(lambda comment: sum(1 for c in comment if c.isupper()))
	df['caps_vs_length'] = df.progress_apply(lambda row: float(row['capitals'])/float(row['total_length']),
	axis=1)
	df['num_words'] = df.question_text.str.count('\S+')
	df['num_unique_words'] = df['question_text'].progress_apply(lambda comment: len(set(w for w in comment.split())))
	df['words_vs_unique'] = df['num_unique_words'] / df['num_words']
	return df