Last active
February 3, 2025 18:49
-
-
Save erik4github/c49ce42496fcf607f38cf86b3c32bb04 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from datetime import datetime, timedelta | |
# ----------------------------------------------------- | |
# Step 1: Generate Sample Data | |
# ----------------------------------------------------- | |
# Create a date range for 10 days. | |
start_date = datetime(2025, 1, 1) | |
dates = [start_date + timedelta(days=i) for i in range(10)] | |
data = [] | |
np.random.seed(0) # For reproducibility | |
for date in dates: | |
# For each date, generate a random number of rows (between 50 and 100) | |
num_rows = np.random.randint(50, 101) | |
for _ in range(num_rows): | |
# Simulate a product number as a random 7-digit number. | |
product_number = np.random.randint(1000000, 10000000) | |
# Create a random time offset for the day. | |
random_time = timedelta( | |
hours=np.random.randint(0, 24), | |
minutes=np.random.randint(0, 60), | |
seconds=np.random.randint(0, 60) | |
) | |
timestamp = date + random_time | |
data.append({'timestamp': timestamp, 'product_number': product_number}) | |
# Create a DataFrame. | |
df = pd.DataFrame(data) | |
# Convert 'timestamp' to datetime and extract just the date. | |
df['timestamp'] = pd.to_datetime(df['timestamp']) | |
df['date'] = df['timestamp'].dt.date | |
# Extract the last two digits from the product number. | |
df['last_two'] = df['product_number'].astype(str).str[-2:] | |
# ----------------------------------------------------- | |
# Step 2: Compute Least Frequent Last Two Digits Per Date | |
# ----------------------------------------------------- | |
# Create a pivot table where each row is a date and each column a last-two-digit pair. | |
# The cell values are the counts (frequency) for that day. Missing pairs get a count of 0. | |
pivot = df.pivot_table(index='date', columns='last_two', aggfunc='size', fill_value=0) | |
# For each date, determine the minimum count and the corresponding last-two-digit pair(s). | |
least_frequent_per_day = {} | |
for date, row in pivot.iterrows(): | |
min_count = row.min() | |
# There might be more than one pair with the same minimum frequency. | |
least_digits = row[row == min_count].index.tolist() | |
least_frequent_per_day[date] = {'min_count': min_count, 'digits': least_digits} | |
print("Least frequent last two digits for each date:") | |
for date, info in least_frequent_per_day.items(): | |
print(f"Date: {date} -> Least frequent digits: {info['digits']} (Count: {info['min_count']})") | |
# ----------------------------------------------------- | |
# Step 3: Compute Overall Least Frequent Last Two Digit Based on Averages | |
# ----------------------------------------------------- | |
# Compute the average frequency for each last-two-digit pair across all dates. | |
# This includes days when a particular pair might not have occurred (count = 0). | |
avg_frequency = pivot.mean(axis=0) | |
# Find the minimum average frequency. | |
min_avg = avg_frequency.min() | |
# Identify the digit pair(s) that achieve this minimum average frequency. | |
least_frequent_overall = avg_frequency[avg_frequency == min_avg].index.tolist() | |
print("\nLeast frequent last two digits based on average frequency across dates:") | |
print(f"{least_frequent_overall} with an average count of {min_avg:.2f}") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from datetime import datetime, timedelta | |
# ----------------------------- | |
# Step 1: Generate sample data | |
# ----------------------------- | |
# Create a date range for, say, 10 days. | |
start_date = datetime(2025, 1, 1) | |
dates = [start_date + timedelta(days=i) for i in range(10)] | |
# For each date, generate a random number of rows (between 50 and 100) | |
data = [] | |
np.random.seed(0) # For reproducibility | |
for date in dates: | |
num_rows = np.random.randint(50, 101) | |
for _ in range(num_rows): | |
# Simulate a product number as a random 7-digit number | |
product_number = np.random.randint(1000000, 10000000) | |
# Random time offset for the day | |
random_time = timedelta( | |
hours=np.random.randint(0, 24), | |
minutes=np.random.randint(0, 60), | |
seconds=np.random.randint(0, 60) | |
) | |
timestamp = date + random_time | |
data.append({'timestamp': timestamp, 'product_number': product_number}) | |
# Create a DataFrame | |
df = pd.DataFrame(data) | |
# ----------------------------- | |
# Step 2: Process the DataFrame | |
# ----------------------------- | |
# Convert the timestamp column to datetime and extract the date. | |
df['timestamp'] = pd.to_datetime(df['timestamp']) | |
df['date'] = df['timestamp'].dt.date | |
# Extract the last two digits from the product number. | |
df['last_two'] = df['product_number'].astype(str).str[-2:] | |
# Define a function to return the minimum frequency count for a given group. | |
def min_last_two_frequency(group): | |
counts = group['last_two'].value_counts() | |
return counts.min() | |
# Group by date and apply the function. | |
daily_min_freq = df.groupby('date').apply(min_last_two_frequency) | |
# Calculate the average of these minimum frequencies. | |
average_min_freq = daily_min_freq.mean() | |
# Print the intermediate daily minimum frequencies and the overall average. | |
print("Daily minimum frequencies (least common last two digits count per day):") | |
print(daily_min_freq) | |
print("\nThe average least frequency occurring (per day) is:", average_min_freq) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment