Skip to content

Instantly share code, notes, and snippets.

@kaenova
Created May 3, 2025 06:12
Show Gist options
  • Save kaenova/9ed10683d99e00e65a7918c5f2783f85 to your computer and use it in GitHub Desktop.
Save kaenova/9ed10683d99e00e65a7918c5f2783f85 to your computer and use it in GitHub Desktop.
Markov Chain Learning

Markov Chain Learning

So i just learn Markov Chain by usecases. The use case are, we want to know what the next product and user's space will be used. In here i encode the state as [current_balance, age_category, product_1, product_2, product_3, product_4]. So if i have [1, 2, 1, 3, 2, 0] It means, the users have low balance (elm 0), he's a teenager (elm 1), have savings as his first product (elm 2), have credit card as his second product (elm 4), have mobile banking as his third product (elm 3), and doesn't have any deposits (elm 5).

I creaete 3 files

  • generator.py to create random users timeline that the product used
  • markov.py create markov chain table
  • interact to interact with the table

Here how to use it

$ ./generator.py 20000
Number of Users: 20000
User Timeline saved to timeline_20250503-125952.pkl

$ ./markov.py timeline_20250503-125952.pkl
Processing user timelines...
Waiting for all tasks to complete...
Processed 1000 user timelines...
Processed 2000 user timelines...
Processed 3000 user timelines...
...
Processed 35000 user timelines...
Processed 36000 user timelines...
Processing done, waiting for update thread to finish...
Processed 37000 user timelines...
Processed 38000 user timelines...
...
Processed 48000 user timelines...
Processed 49000 user timelines...
Update thread finished.
State table saved to markov_20250503-130007.pkl

$ ./interact.py markov_20250503-130007.pkl
Enter the state (delimited by space): 3 2 1 2 0 0
Normalized table for state (3, 2, 1, 2, 0, 0):
State (2, 3, 1, 2, 3, 0): 0.2692307692307692
State (1, 3, 1, 2, 3, 0): 0.23076923076923078
State (2, 2, 1, 2, 3, 0): 0.11538461538461539
State (1, 2, 1, 2, 3, 0): 0.07692307692307693
State (3, 2, 1, 2, 3, 0): 0.07692307692307693
State (1, 2, 1, 2, 0, 3): 0.038461538461538464
State (1, 3, 1, 2, 0, 3): 0.038461538461538464
State (1, 4, 1, 2, 3, 0): 0.038461538461538464
State (2, 2, 1, 2, 0, 3): 0.038461538461538464
State (2, 4, 1, 2, 3, 0): 0.038461538461538464
State (3, 3, 1, 2, 3, 0): 0.038461538461538464

As you can see on

$ ./interact.py markov_20250503-130007.pkl
Enter the state (delimited by space): 3 2 1 2 0 0

I want to predict the next product a user that currently have high balance, and a teenager, the savings is his first product, and the m-banking is his second product. And the results are here:

State (2, 3, 1, 2, 3, 0): 0.2692307692307692
State (1, 3, 1, 2, 3, 0): 0.23076923076923078
State (2, 2, 1, 2, 3, 0): 0.11538461538461539
State (1, 2, 1, 2, 3, 0): 0.07692307692307693
State (3, 2, 1, 2, 3, 0): 0.07692307692307693
State (1, 2, 1, 2, 0, 3): 0.038461538461538464
State (1, 3, 1, 2, 0, 3): 0.038461538461538464
State (1, 4, 1, 2, 3, 0): 0.038461538461538464
State (2, 2, 1, 2, 0, 3): 0.038461538461538464
State (2, 4, 1, 2, 3, 0): 0.038461538461538464
State (3, 3, 1, 2, 3, 0): 0.038461538461538464

The state (2, 3, 1, 2, 3, 0) has the highest probability, where the user probably have credit card for the next product (elm 4, denoted 3rd product)

#! python
import itertools
import random
from concurrent.futures import ThreadPoolExecutor
import sys
import pickle
import datetime
# Meta Data
balance= [1,2,3]
umur = [1,2,3,4]
# Features
# Number of features to be used in the state
feature = ["tabungan", "m-banking", "cc", "deposito"]
num_of_features = len(feature)
feature_state = list([i for i in range(num_of_features + 1)])
combinations = list(itertools.product(balance, umur, *[feature_state for _ in range(len(feature))])) # Generate all combinations of balance, umur and feature state
def generate_random_num_of_features():
return random.choice([i for i in range(1, num_of_features + 1)]) # Generate a random number of features from 1 to num_of_features
def get_number_of_state_features(num_features):
return num_features + 1 # Adding 1 for the initial state
def generate_random_balance(num_faetures_state):
# Generate a random balance from the list of balances with a number of permutations
random_balance = []
for _ in range(num_faetures_state):
random_balance.append(random.choice(balance))
return random_balance
def generate_random_umur(num_faetures_state):
# Generate a random umur from the list of umur with a number of permutations
all_umur = []
latest_umur = random.choice(umur)
all_umur.append(latest_umur)
for _ in range(num_faetures_state - 1):
random_umur = random.choice(umur[latest_umur-1:])
latest_umur = random_umur
all_umur.append(random_umur)
return all_umur
def generate_random_features_sequence(num_featuers, total_features):
INITIAL_STATE = [0 for _ in range(total_features)] # Represents the initial state of the features, all set to 0, it will be a sequence of features tabungan, livin, deposito, cc
# Generate sequence of features
features_sequence = [i for i in range(1, num_featuers + 1)] # Generate a sequence of features from 1 to num_features
random.shuffle(features_sequence)
# print(f"Features Sequence: {features_sequence}")
# all sequence steps will be stored in this list
sequence_step = []
sequence_step.append(INITIAL_STATE)
latest_state = INITIAL_STATE.copy()
# Based on features_sequence, generate features sequence step
for step in range(num_featuers):
list_index_addeed = features_sequence[step] - 1
latest_state[list_index_addeed] = step + 1
latest_state = latest_state.copy()
sequence_step.append(latest_state.copy())
return sequence_step
def combine_state_features(sequence_step, random_balance, random_umur):
# Combine the state features with the random balance and umur
combined_features = []
for i in range(len(sequence_step)):
combined_features.append( [random_balance[i], random_umur[i]] + sequence_step[i])
return combined_features
def random_timeline(_):
num_features_used = generate_random_num_of_features()
num_state_features = get_number_of_state_features(num_features_used)
random_balance = generate_random_balance(num_state_features)
random_umur = generate_random_umur(num_state_features)
random_features_sequence = generate_random_features_sequence(num_features_used, num_of_features)
# print(f"Random Balance: {random_balance}")
# print(f"Random Umur: {random_umur}")
# print(f"Random Features Sequence: {random_features_sequence}")
combined_features = combine_state_features(random_features_sequence, random_balance, random_umur)
# print(f"Combined Features: {combined_features}")
return combined_features
def generate_many_users_timeline(num_users):
# Generate many users timeline using ThreadPoolExecutor for parallel processing
with ThreadPoolExecutor(5000) as executor:
results = list(executor.map(random_timeline, range(num_users) ))
return results
if __name__ == "__main__":
# Get number of users from args
num_users = int(sys.argv[1]) if len(sys.argv) > 1 else 10_000
# Generate a random timeline for a user
# random_timeline()
# Generate many users timeline
all_users_timeline = generate_many_users_timeline(num_users)
# print(f"All Users Timeline: {all_users_timeline}")
print(f"Number of Users: {len(all_users_timeline)}")
# Saving the objects:
current_date_time = datetime.datetime.now()
final_data = {
"num_users": num_users,
"num_features": num_of_features,
"all_users_timeline": all_users_timeline,
"feature_state": feature_state,
"combinations": combinations,
"ctime": current_date_time,
}
with open(f'timeline_{current_date_time.strftime("%Y%m%d-%H%M%S")}.pkl', 'wb') as f:
pickle.dump(final_data, f)
print(f"User Timeline saved to timeline_{current_date_time.strftime('%Y%m%d-%H%M%S')}.pkl")
#! python
import pickle
import sys
if __name__ == "__main__":
# Get file name from args
file_name = sys.argv[1] if len(sys.argv) > 1 else 'timeline_20250503-115215.pkl'
# Load the user timeline from a pickle file
with open(file_name, 'rb') as f:
data = pickle.load(f)
normalized_table = data['normalized_table']
combinations = data['combinations']
# input the state delimit by space for user
input_state = input("Enter the state (delimited by space): ")
input_state = tuple(map(int, input_state.split()))
# Check if the input state is in the combinations
if input_state not in combinations:
print("Invalid state!")
else:
# Get the index of the input state
index = combinations.index(input_state)
# Print the corresponding row in the normalized table that the value is not 0
# Sort the row in descending order
print(f"Normalized table for state {input_state}:")
sorted_values = sorted(
[(combinations[i], normalized_table[index][i]) for i in range(len(normalized_table[index])) if normalized_table[index][i] != 0],
key=lambda x: x[1],
reverse=True
)
for state, value in sorted_values:
print(f"State {state}: {value}")
#! python
import pickle
import datetime
import sys
from queue import Queue
from concurrent.futures import ThreadPoolExecutor
from threading import Thread
def build_state_table(combinations):
num_features = len(combinations)
return [ [0] * num_features for _ in range(num_features) ]
def process_user_timeline(user_timeline, queue):
for i in range(1, len(user_timeline)):
queue.put({
"before" : user_timeline[i-1],
"after" : user_timeline[i],
})
def update_state_table_from_queue(combinations, state_table, user_timeline_queue: 'Queue', is_done = [False]):
i = 0
while not user_timeline_queue.empty() or not is_done[0]:
user_timeline = user_timeline_queue.get()
before = user_timeline['before']
after = user_timeline['after']
idx_before = combinations.index(tuple(before))
idx_after = combinations.index(tuple(after))
state_table[idx_before][idx_after] += 1
i += 1
if i % 1000 == 0:
print(f"Processed {i} user timelines...")
def normalize_state_table(state_table):
new_state_table = [ [0.0] * len(state_table) for _ in range(len(state_table)) ]
for i in range(len(state_table)):
total = sum(state_table[i])
if total > 0:
new_state_table[i] = [x / total for x in state_table[i]]
return new_state_table
if __name__ == "__main__":
# Get file name from args
file_name = sys.argv[1] if len(sys.argv) > 1 else 'timeline_20250503-115215.pkl'
# Load the user timeline from a pickle file
with open(file_name, 'rb') as f:
data = pickle.load(f)
combinations = data['combinations']
users_timeline = data['all_users_timeline']
# print(combinations)
# exit(0)
# Build the state table
state_table = build_state_table(combinations)
# Build a queue to hold user timelines
user_timeline_queue = Queue()
# Build notifier queue done processing
done_processing = [False]
# Create a thread to update the state table from the queue
update_thread = Thread(target=update_state_table_from_queue, args=(combinations, state_table, user_timeline_queue, done_processing))
# Process each user timeline in parallel and update the state table
with ThreadPoolExecutor(max_workers=10000) as executor:
print("Processing user timelines...")
for user_timeline in users_timeline:
# Process each user timeline in parallel
executor.submit(process_user_timeline, user_timeline, user_timeline_queue)
update_thread.start()
# Wait for all tasks to complete
print("Waiting for all tasks to complete...")
executor.shutdown(wait=True)
done_processing[0] = True
print("Processing done, waiting for update thread to finish...")
# Wait for the update thread to finish
update_thread.join()
print("Update thread finished.")
# normalize the state table
normlized_table = normalize_state_table(state_table)
# Prepare the state table for saving
current_date_time = datetime.datetime.now()
data = {
"timeline_file": file_name,
"combinations": combinations,
"state_table": state_table,
"normalized_table": normlized_table,
"ctime": current_date_time,
}
# Save the state table to a pickle file
with open(f'markov_{current_date_time.strftime("%Y%m%d-%H%M%S")}.pkl', 'wb') as f:
pickle.dump(data, f)
print(f"State table saved to markov_{current_date_time.strftime('%Y%m%d-%H%M%S')}.pkl")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment