erik4github · February 3, 2025 18:49
diff --git a/freq.py b/freq.py
 import pandas as pd
 import numpy as np
 from datetime import datetime, timedelta

 # -----------------------------------------------------
 # Step 1: Generate Sample Data
 # -----------------------------------------------------
 # Create a date range for 10 days.
 start_date = datetime(2025, 1, 1)
 dates = [start_date + timedelta(days=i) for i in range(10)]

 data = []
 np.random.seed(0)  # For reproducibility

 for date in dates:
    # For each date, generate a random number of rows (between 50 and 100)
    num_rows = np.random.randint(50, 101)
    for _ in range(num_rows):
        # Simulate a product number as a random 7-digit number.
        product_number = np.random.randint(1000000, 10000000)
        # Create a random time offset for the day.
        random_time = timedelta(
            hours=np.random.randint(0, 24),
            minutes=np.random.randint(0, 60),
            seconds=np.random.randint(0, 60)
        )
        timestamp = date + random_time
        data.append({'timestamp': timestamp, 'product_number': product_number})

 # Create a DataFrame.
 df = pd.DataFrame(data)

 # Convert 'timestamp' to datetime and extract just the date.
 df['timestamp'] = pd.to_datetime(df['timestamp'])
 df['date'] = df['timestamp'].dt.date

 # Extract the last two digits from the product number.
 df['last_two'] = df['product_number'].astype(str).str[-2:]

 # -----------------------------------------------------
 # Step 2: Compute Least Frequent Last Two Digits Per Date
 # -----------------------------------------------------
 # Create a pivot table where each row is a date and each column a last-two-digit pair.
 # The cell values are the counts (frequency) for that day. Missing pairs get a count of 0.
 pivot = df.pivot_table(index='date', columns='last_two', aggfunc='size', fill_value=0)

 # For each date, determine the minimum count and the corresponding last-two-digit pair(s).
 least_frequent_per_day = {}
 for date, row in pivot.iterrows():
    min_count = row.min()
    # There might be more than one pair with the same minimum frequency.
    least_digits = row[row == min_count].index.tolist()
    least_frequent_per_day[date] = {'min_count': min_count, 'digits': least_digits}

 print("Least frequent last two digits for each date:")
 for date, info in least_frequent_per_day.items():
    print(f"Date: {date} -> Least frequent digits: {info['digits']} (Count: {info['min_count']})")

 # -----------------------------------------------------
 # Step 3: Compute Overall Least Frequent Last Two Digit Based on Averages
 # -----------------------------------------------------
 # Compute the average frequency for each last-two-digit pair across all dates.
 # This includes days when a particular pair might not have occurred (count = 0).
 avg_frequency = pivot.mean(axis=0)

 # Find the minimum average frequency.
 min_avg = avg_frequency.min()
 # Identify the digit pair(s) that achieve this minimum average frequency.
 least_frequent_overall = avg_frequency[avg_frequency == min_avg].index.tolist()

 print("\nLeast frequent last two digits based on average frequency across dates:")
 print(f"{least_frequent_overall} with an average count of {min_avg:.2f}")
diff --git a/frequency.py b/frequency.py
 import pandas as pd
 import numpy as np
 from datetime import datetime, timedelta

 # -----------------------------
 # Step 1: Generate sample data
 # -----------------------------
 # Create a date range for, say, 10 days.
 start_date = datetime(2025, 1, 1)
 dates = [start_date + timedelta(days=i) for i in range(10)]

 # For each date, generate a random number of rows (between 50 and 100)
 data = []
 np.random.seed(0)  # For reproducibility

 for date in dates:
    num_rows = np.random.randint(50, 101)
    for _ in range(num_rows):
        # Simulate a product number as a random 7-digit number
        product_number = np.random.randint(1000000, 10000000)
        # Random time offset for the day
        random_time = timedelta(
            hours=np.random.randint(0, 24),
            minutes=np.random.randint(0, 60),
            seconds=np.random.randint(0, 60)
        )
        timestamp = date + random_time
        data.append({'timestamp': timestamp, 'product_number': product_number})

 # Create a DataFrame
 df = pd.DataFrame(data)

 # -----------------------------
 # Step 2: Process the DataFrame
 # -----------------------------
 # Convert the timestamp column to datetime and extract the date.
 df['timestamp'] = pd.to_datetime(df['timestamp'])
 df['date'] = df['timestamp'].dt.date

 # Extract the last two digits from the product number.
 df['last_two'] = df['product_number'].astype(str).str[-2:]

 # Define a function to return the minimum frequency count for a given group.
 def min_last_two_frequency(group):
    counts = group['last_two'].value_counts()
    return counts.min()

 # Group by date and apply the function.
 daily_min_freq = df.groupby('date').apply(min_last_two_frequency)

 # Calculate the average of these minimum frequencies.
 average_min_freq = daily_min_freq.mean()

 # Print the intermediate daily minimum frequencies and the overall average.
 print("Daily minimum frequencies (least common last two digits count per day):")
 print(daily_min_freq)
 print("\nThe average least frequency occurring (per day) is:", average_min_freq)
	import pandas as pd
	import numpy as np
	from datetime import datetime, timedelta

	# -----------------------------------------------------
	# Step 1: Generate Sample Data
	# -----------------------------------------------------
	# Create a date range for 10 days.
	start_date = datetime(2025, 1, 1)
	dates = [start_date + timedelta(days=i) for i in range(10)]

	data = []
	np.random.seed(0) # For reproducibility

	for date in dates:
	# For each date, generate a random number of rows (between 50 and 100)
	num_rows = np.random.randint(50, 101)
	for _ in range(num_rows):
	# Simulate a product number as a random 7-digit number.
	product_number = np.random.randint(1000000, 10000000)
	# Create a random time offset for the day.
	random_time = timedelta(
	hours=np.random.randint(0, 24),
	minutes=np.random.randint(0, 60),
	seconds=np.random.randint(0, 60)
	)
	timestamp = date + random_time
	data.append({'timestamp': timestamp, 'product_number': product_number})

	# Create a DataFrame.
	df = pd.DataFrame(data)

	# Convert 'timestamp' to datetime and extract just the date.
	df['timestamp'] = pd.to_datetime(df['timestamp'])
	df['date'] = df['timestamp'].dt.date

	# Extract the last two digits from the product number.
	df['last_two'] = df['product_number'].astype(str).str[-2:]

	# -----------------------------------------------------
	# Step 2: Compute Least Frequent Last Two Digits Per Date
	# -----------------------------------------------------
	# Create a pivot table where each row is a date and each column a last-two-digit pair.
	# The cell values are the counts (frequency) for that day. Missing pairs get a count of 0.
	pivot = df.pivot_table(index='date', columns='last_two', aggfunc='size', fill_value=0)

	# For each date, determine the minimum count and the corresponding last-two-digit pair(s).
	least_frequent_per_day = {}
	for date, row in pivot.iterrows():
	min_count = row.min()
	# There might be more than one pair with the same minimum frequency.
	least_digits = row[row == min_count].index.tolist()
	least_frequent_per_day[date] = {'min_count': min_count, 'digits': least_digits}

	print("Least frequent last two digits for each date:")
	for date, info in least_frequent_per_day.items():
	print(f"Date: {date} -> Least frequent digits: {info['digits']} (Count: {info['min_count']})")

	# -----------------------------------------------------
	# Step 3: Compute Overall Least Frequent Last Two Digit Based on Averages
	# -----------------------------------------------------
	# Compute the average frequency for each last-two-digit pair across all dates.
	# This includes days when a particular pair might not have occurred (count = 0).
	avg_frequency = pivot.mean(axis=0)

	# Find the minimum average frequency.
	min_avg = avg_frequency.min()
	# Identify the digit pair(s) that achieve this minimum average frequency.
	least_frequent_overall = avg_frequency[avg_frequency == min_avg].index.tolist()

	print("\nLeast frequent last two digits based on average frequency across dates:")
	print(f"{least_frequent_overall} with an average count of {min_avg:.2f}")