Skip to content

Instantly share code, notes, and snippets.

@vinayakg
Created February 13, 2024 10:45
Show Gist options
  • Save vinayakg/13b05a3ae73a3263faca42d9b4c2762e to your computer and use it in GitHub Desktop.
Save vinayakg/13b05a3ae73a3263faca42d9b4c2762e to your computer and use it in GitHub Desktop.
Data Generator python
import itertools
import psycopg2
import json
import random
import csv
import time
import sys
import datetime
import pytz
sys.stdout = open('/home/ace-capp-apim-vm-dev-user/ma_mockdata/asset_sales/logger/pf_output_log.txt', 'a')
sub_product = ['Tadawul', 'Al_Khair', 'Express_Finance', 'Salary_Advance', 'Topup', 'Others']
channel = ['branch']
sub_channel = ['branch-1', 'branch-2', 'branch-3', 'branch-4', 'branch-5', 'branch-6', 'branch-7',
'branch-8', 'branch-9', 'branch-10', 'branch-11', 'branch-12', 'branch-13', 'branch-14',
'branch-15', 'branch-16', 'branch-17', 'branch-18', 'branch-19', 'branch-20', 'branch-21',
'branch-22', 'branch-23', 'branch-24', 'branch-25', 'branch-26', 'branch-27', 'branch-28',
'branch-29', 'branch-30', 'branch-31', 'branch-32', 'branch-33', 'branch-34', 'branch-35',
'branch-36', 'branch-37', 'branch-38', 'branch-39', 'branch-40', 'branch-41', 'branch-42',
'branch-43', 'branch-44', 'branch-45', 'branch-46', 'branch-47', 'branch-48', 'branch-49',
'dsu-1', 'dsu-2', 'dsu-3', 'dsu-4', 'dsu-5', 'dsu-6', 'dsu-7',
'dsu-8', 'dsu-9', 'dsu-10', 'dsu-11', 'dsu-12', 'dsu-13', 'dsu-14',
'dsu-15', 'dsu-16', 'dsu-17', 'dsu-18', 'dsu-19', 'dsu-20', 'dsu-21',
'dsu-22', 'dsu-23', 'dsu-24', 'dsu-25', 'dsu-26', 'dsu-27', 'dsu-28', 'dsu-29', 'dsu-30', 'DTS']
customer_segments = ['priority_plus', 'gold', 'diamond', 'mass', 'rise', 'bbd', 'private_banking']
nationalities = ['uae_national', 'arab', 'asian', 'western', 'others']
emirates = ['abu_dhabi', 'dubai', 'sharjah', 'northern_emirates']
salary_brackets = ['NA']
age_brackets = ['NA']
#salary_brackets = ['less_than_5k', '5k_to_10k', '10k_to_20k', '20k_to_40k', '40k_to_80k', 'more_than_80k', 'NA']
#age_brackets = ['Age_less_than_21', 'age_btwn_21_to_30', 'age_btwn_31_to_40', 'age_btwn_41_to_50', 'age_btwn_51_to_60', 'age_greater_than_60', 'NA']
combinations = list(itertools.product(sub_product, channel, sub_channel, customer_segments, nationalities, emirates, salary_brackets, age_brackets))
# Replace these with your PostgreSQL database connection details
db_params = {
'host': '172.24.92.84',
'database': 'acedevdb',
'user': 'acedevadmin',
'password': 'dbAcedev2023',
'port': '5432'
}
# Define the table name
table_name = 'bu_asset_sales_pf'
# Create a connection to the PostgreSQL database
connection = psycopg2.connect(**db_params)
# Create a cursor object to interact with the database
cursor = connection.cursor()
# Create the table if it doesn't exist
create_table_query = f'''
CREATE TABLE IF NOT EXISTS {table_name} (
id SERIAL PRIMARY KEY,
sub_product VARCHAR(255),
channel VARCHAR(255),
sub_channel VARCHAR(255),
customer_segment VARCHAR(255),
nationality VARCHAR(255),
emirate VARCHAR(255),
salary_bracket VARCHAR(255),
age_bracket VARCHAR(255),
mtd INTEGER[],
month_end INTEGER[],
ytd INTEGER[],
year_end INTEGER[],
mtd_target INTEGER[],
ytd_target INTEGER[],
month_end_target INTEGER[],
year_end_target INTEGER[],
ingestion_timestamp TIMESTAMP
);
'''
cursor.execute(create_table_query)
print("table created")
# counter = 0
# Insert records into the table
insert_query = f'''
INSERT INTO {table_name} (sub_product, channel, sub_channel, customer_segment, nationality, emirate, salary_bracket, age_bracket,
mtd, month_end, ytd, year_end, mtd_target, ytd_target, month_end_target, year_end_target, ingestion_timestamp)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
'''
# Generate JSON value for mtd column
def generate_json(len, n):
return json.dumps({f'm{i}': random.randint(1, n) for i in range(1, len)})
def generate_array(len,n):
array = []
for i in range(0,len):
array.append(random.randint(1, n))
return array
def generate_random_int(x,y):
return random.randint(x, y)
def generate_gst_timestamp():
# Get the current UTC time
utc_now = datetime.datetime.utcnow()
# Define the UTC time zone
utc_timezone = pytz.timezone('UTC')
utc_now = utc_timezone.localize(utc_now)
# Define the Gulf Standard Time (GST) time zone
gst_timezone = pytz.timezone('Asia/Dubai')
# Convert the UTC time to GST
gst_time = utc_now.astimezone(gst_timezone)
# Format the timestamp as a string
return gst_time.strftime('%Y-%m-%d %H:%M:%S')
chunk_size = 300
combination_length = len(combinations)
iterations = combination_length / chunk_size
counter = 0
print("combination length "+str(combination_length))
print("Expected iteration "+str(iterations))
for i in range(0, len(combinations), chunk_size):
sys.stdout = open('/home/ace-capp-apim-vm-dev-user/ma_mockdata/asset_sales/pf_output_log.txt', 'a')
chunk = combinations[i:i + chunk_size]
counter += 1
records_to_insert = []
for record in chunk:
new_record = record + (
generate_array(24, 50),
generate_array(24, 80),
generate_array(24, 150),
generate_array(24, 200),
generate_array(24, 100),
generate_array(24, 250),
generate_array(24, 10),
generate_array(24, 10),
generate_gst_timestamp()
)
records_to_insert.append(new_record)
#print("length of record_to_insert"+str(len(records_to_insert)))
start_time = time.time()
cursor.executemany(insert_query, records_to_insert)
end_time = time.time()
elapsed_time = end_time - start_time
print("chunk inserted : "+str(counter)+" start time: "+str(start_time)+" Time taken: "+str(elapsed_time))
print("Iteration remaining "+str(iterations-counter))
connection.commit()
sys.stdout.close()
connection.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment