Last active
November 21, 2023 05:21
-
-
Save bastosmichael/2042249dc593b8ebdbd5ea9aca0cf026 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn.model_selection import train_test_split | |
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.metrics import accuracy_score | |
from datetime import datetime, timedelta | |
import random | |
# Function to generate random dates | |
def generate_random_dates(start_date, end_date, num_dates): | |
date_range = pd.date_range(start_date, end_date).to_pydatetime().tolist() | |
return random.choices(date_range, k=num_dates) | |
# Generating made-up data | |
np.random.seed(0) | |
num_orders = 100 | |
start_date = "2020-01-01" | |
end_date = "2023-12-31" | |
data = { | |
"vendor_id": np.random.randint(1, 10, size=num_orders), | |
"order_size": np.random.choice(["small", "medium", "large"], size=num_orders), | |
"season": np.random.choice( | |
["winter", "spring", "summer", "autumn"], size=num_orders | |
), | |
"original_estimated_date": generate_random_dates(start_date, end_date, num_orders), | |
"updated_delivery_date": generate_random_dates(start_date, end_date, num_orders), | |
"final_receipt_date": generate_random_dates(start_date, end_date, num_orders), | |
} | |
df = pd.DataFrame(data) | |
# Preprocess the data | |
df["original_delay"] = ( | |
pd.to_datetime(df["final_receipt_date"]) | |
- pd.to_datetime(df["original_estimated_date"]) | |
).dt.days | |
df["updated_delay"] = ( | |
pd.to_datetime(df["final_receipt_date"]) | |
- pd.to_datetime(df["updated_delivery_date"]) | |
).dt.days | |
df["delay_category"] = df["original_delay"].apply( | |
lambda x: 1 if x > 60 else (2 if x > 90 else 0) | |
) | |
# Additional feature engineering | |
df["order_size"] = df["order_size"].map({"small": 1, "medium": 2, "large": 3}) | |
df = pd.get_dummies(df, columns=["season", "vendor_id"]) | |
# Splitting the dataset | |
X = df.drop( | |
columns=[ | |
"delay_category", | |
"original_estimated_date", | |
"updated_delivery_date", | |
"final_receipt_date", | |
] | |
) | |
y = df["delay_category"] | |
X_train, X_test, y_train, y_test = train_test_split( | |
X, y, test_size=0.3, random_state=42 | |
) | |
# Training the model | |
model = RandomForestClassifier(random_state=42) | |
model.fit(X_train, y_train) | |
# Making predictions | |
predictions = model.predict(X_test) | |
# Evaluate the model | |
accuracy = accuracy_score(y_test, predictions) | |
print(f"Model Accuracy: {accuracy}") | |
# Function to prepare and align future order data with training data | |
def prepare_future_order_data(data, feature_columns): | |
# Ensure all required features are present and in the correct order | |
prepared_data = {col: data[col] if col in data else [0] for col in feature_columns} | |
return pd.DataFrame(prepared_data) | |
# Example of predicting future orders | |
# Assuming we have data for a future order | |
future_order_data = { | |
"order_size": [2], # medium | |
"season_autumn": [0], | |
"season_spring": [1], | |
"season_summer": [0], | |
"season_winter": [0], | |
"original_delay": [45], # Assuming 45 days delay based on historical trends | |
"updated_delay": [30], # Assuming 30 days delay based on updated info | |
"vendor_id_2": [1], # Example vendor_id | |
} | |
# Prepare the future order data | |
future_order_df = prepare_future_order_data(future_order_data, X_train.columns) | |
# Predicting the delay category for the future order | |
future_prediction = model.predict(future_order_df) | |
print(f"Future Order Delay Prediction: {future_prediction}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment