Skip to content

Instantly share code, notes, and snippets.

View j450h1's full-sized avatar

Jas Sohi j450h1

  • Microsoft
  • Vancouver, BC
View GitHub Profile
user_count = user_df.groupby('churn').count()
user_count = user_count.withColumn('percent', col('count')/sum('count').over(Window.partitionBy()))
# multiply by 100 and round
user_count = user_count.withColumn("percent", round(user_count["percent"] * 100, 2))
user_count.orderBy('percent', ascending=False).show()
+-----+-----+-------+
|churn|count|percent|
+-----+-----+-------+
| 0| 173| 76.89|
+------+------------------+------------------+--------------+--------------+------------------+
|gender|subscription_level|auth_logged_in_cnt|auth_guest_cnt|status_404_cnt|page_next_song_cnt|
+------+------------------+------------------+--------------+--------------+------------------+
| F| free| 11| 4| 6| 9|
+------+------------------+------------------+--------------+--------------+------------------+
rf_f1 = MulticlassClassificationEvaluator(labelCol="indexedLabel",metricName='f1').evaluate(predictions)
print('F1 Score', rf_f1)
F1 Score 0.6919632934386234
f1_gbt = MulticlassClassificationEvaluator(labelCol="indexedLabel", metricName='f1').evaluate(predictions_gbt)
print('F1', f1_gbt)
F1 0.7115836101882613
# evaluate the model with test set
evaluator = MulticlassClassificationEvaluator()
print('F1-Score ', evaluator.evaluate(prediction {evaluator.metricName: 'f1'}))
F1-Score 0.6736596736596737