audhiaprilliant · December 24, 2020 02:55
diff --git a/threshold_simulation.py b/threshold_simulation.py
 # Import module for data manipulation
 import pandas as pd
 # Import module for linear algebra
 import numpy as np
 # Import module for data simulation
 from sklearn.datasets import make_classification     # Create a synthetic dataframe
 from sklearn.linear_model import LogisticRegression  # Classification model
 from sklearn.model_selection import train_test_split # Split the dataframe
 from sklearn.metrics import roc_curve                # Calculate the ROC curve
 from sklearn.metrics import precision_recall_curve   # Calculate the Precision-Recall curve
 from sklearn.metrics import f1_score                 # Calculate the F-score
 # Import module for data visualization
 from plotnine import *
 import plotnine

 # Generate the dataset
 X, y = make_classification(n_samples = 10000, n_features = 2, n_redundant = 0,
                           n_clusters_per_class = 1, weights = [0.99], flip_y = 0, random_state = 0)

 # Data partitioning
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0, stratify=y)

 # Fit the model
 reglogModel = LogisticRegression(random_state = 0)
 reglogModel.fit(X_train, y_train)

 # Predict the probabilities
 y_pred = reglogModel.predict_proba(X_test)

 # Get the probabilities for positive class
 y_pred = y_pred[:, 1]
	# Import module for data manipulation
	import pandas as pd
	# Import module for linear algebra
	import numpy as np
	# Import module for data simulation
	from sklearn.datasets import make_classification # Create a synthetic dataframe
	from sklearn.linear_model import LogisticRegression # Classification model
	from sklearn.model_selection import train_test_split # Split the dataframe
	from sklearn.metrics import roc_curve # Calculate the ROC curve
	from sklearn.metrics import precision_recall_curve # Calculate the Precision-Recall curve
	from sklearn.metrics import f1_score # Calculate the F-score
	# Import module for data visualization
	from plotnine import *
	import plotnine

	# Generate the dataset
	X, y = make_classification(n_samples = 10000, n_features = 2, n_redundant = 0,
	n_clusters_per_class = 1, weights = [0.99], flip_y = 0, random_state = 0)

	# Data partitioning
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0, stratify=y)

	# Fit the model
	reglogModel = LogisticRegression(random_state = 0)
	reglogModel.fit(X_train, y_train)

	# Predict the probabilities
	y_pred = reglogModel.predict_proba(X_test)

	# Get the probabilities for positive class
	y_pred = y_pred[:, 1]
No results found