Skip to content

Instantly share code, notes, and snippets.

@erik4github
Last active February 3, 2025 18:49
Show Gist options
  • Save erik4github/c49ce42496fcf607f38cf86b3c32bb04 to your computer and use it in GitHub Desktop.
Save erik4github/c49ce42496fcf607f38cf86b3c32bb04 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
# -----------------------------------------------------
# Step 1: Generate Sample Data
# -----------------------------------------------------
# Create a date range for 10 days.
start_date = datetime(2025, 1, 1)
dates = [start_date + timedelta(days=i) for i in range(10)]
data = []
np.random.seed(0) # For reproducibility
for date in dates:
# For each date, generate a random number of rows (between 50 and 100)
num_rows = np.random.randint(50, 101)
for _ in range(num_rows):
# Simulate a product number as a random 7-digit number.
product_number = np.random.randint(1000000, 10000000)
# Create a random time offset for the day.
random_time = timedelta(
hours=np.random.randint(0, 24),
minutes=np.random.randint(0, 60),
seconds=np.random.randint(0, 60)
)
timestamp = date + random_time
data.append({'timestamp': timestamp, 'product_number': product_number})
# Create a DataFrame.
df = pd.DataFrame(data)
# Convert 'timestamp' to datetime and extract just the date.
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['date'] = df['timestamp'].dt.date
# Extract the last two digits from the product number.
df['last_two'] = df['product_number'].astype(str).str[-2:]
# -----------------------------------------------------
# Step 2: Compute Least Frequent Last Two Digits Per Date
# -----------------------------------------------------
# Create a pivot table where each row is a date and each column a last-two-digit pair.
# The cell values are the counts (frequency) for that day. Missing pairs get a count of 0.
pivot = df.pivot_table(index='date', columns='last_two', aggfunc='size', fill_value=0)
# For each date, determine the minimum count and the corresponding last-two-digit pair(s).
least_frequent_per_day = {}
for date, row in pivot.iterrows():
min_count = row.min()
# There might be more than one pair with the same minimum frequency.
least_digits = row[row == min_count].index.tolist()
least_frequent_per_day[date] = {'min_count': min_count, 'digits': least_digits}
print("Least frequent last two digits for each date:")
for date, info in least_frequent_per_day.items():
print(f"Date: {date} -> Least frequent digits: {info['digits']} (Count: {info['min_count']})")
# -----------------------------------------------------
# Step 3: Compute Overall Least Frequent Last Two Digit Based on Averages
# -----------------------------------------------------
# Compute the average frequency for each last-two-digit pair across all dates.
# This includes days when a particular pair might not have occurred (count = 0).
avg_frequency = pivot.mean(axis=0)
# Find the minimum average frequency.
min_avg = avg_frequency.min()
# Identify the digit pair(s) that achieve this minimum average frequency.
least_frequent_overall = avg_frequency[avg_frequency == min_avg].index.tolist()
print("\nLeast frequent last two digits based on average frequency across dates:")
print(f"{least_frequent_overall} with an average count of {min_avg:.2f}")
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
# -----------------------------
# Step 1: Generate sample data
# -----------------------------
# Create a date range for, say, 10 days.
start_date = datetime(2025, 1, 1)
dates = [start_date + timedelta(days=i) for i in range(10)]
# For each date, generate a random number of rows (between 50 and 100)
data = []
np.random.seed(0) # For reproducibility
for date in dates:
num_rows = np.random.randint(50, 101)
for _ in range(num_rows):
# Simulate a product number as a random 7-digit number
product_number = np.random.randint(1000000, 10000000)
# Random time offset for the day
random_time = timedelta(
hours=np.random.randint(0, 24),
minutes=np.random.randint(0, 60),
seconds=np.random.randint(0, 60)
)
timestamp = date + random_time
data.append({'timestamp': timestamp, 'product_number': product_number})
# Create a DataFrame
df = pd.DataFrame(data)
# -----------------------------
# Step 2: Process the DataFrame
# -----------------------------
# Convert the timestamp column to datetime and extract the date.
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['date'] = df['timestamp'].dt.date
# Extract the last two digits from the product number.
df['last_two'] = df['product_number'].astype(str).str[-2:]
# Define a function to return the minimum frequency count for a given group.
def min_last_two_frequency(group):
counts = group['last_two'].value_counts()
return counts.min()
# Group by date and apply the function.
daily_min_freq = df.groupby('date').apply(min_last_two_frequency)
# Calculate the average of these minimum frequencies.
average_min_freq = daily_min_freq.mean()
# Print the intermediate daily minimum frequencies and the overall average.
print("Daily minimum frequencies (least common last two digits count per day):")
print(daily_min_freq)
print("\nThe average least frequency occurring (per day) is:", average_min_freq)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment