Created
February 13, 2024 10:45
-
-
Save vinayakg/13b05a3ae73a3263faca42d9b4c2762e to your computer and use it in GitHub Desktop.
Data Generator python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import itertools | |
| import psycopg2 | |
| import json | |
| import random | |
| import csv | |
| import time | |
| import sys | |
| import datetime | |
| import pytz | |
| sys.stdout = open('/home/ace-capp-apim-vm-dev-user/ma_mockdata/asset_sales/logger/pf_output_log.txt', 'a') | |
| sub_product = ['Tadawul', 'Al_Khair', 'Express_Finance', 'Salary_Advance', 'Topup', 'Others'] | |
| channel = ['branch'] | |
| sub_channel = ['branch-1', 'branch-2', 'branch-3', 'branch-4', 'branch-5', 'branch-6', 'branch-7', | |
| 'branch-8', 'branch-9', 'branch-10', 'branch-11', 'branch-12', 'branch-13', 'branch-14', | |
| 'branch-15', 'branch-16', 'branch-17', 'branch-18', 'branch-19', 'branch-20', 'branch-21', | |
| 'branch-22', 'branch-23', 'branch-24', 'branch-25', 'branch-26', 'branch-27', 'branch-28', | |
| 'branch-29', 'branch-30', 'branch-31', 'branch-32', 'branch-33', 'branch-34', 'branch-35', | |
| 'branch-36', 'branch-37', 'branch-38', 'branch-39', 'branch-40', 'branch-41', 'branch-42', | |
| 'branch-43', 'branch-44', 'branch-45', 'branch-46', 'branch-47', 'branch-48', 'branch-49', | |
| 'dsu-1', 'dsu-2', 'dsu-3', 'dsu-4', 'dsu-5', 'dsu-6', 'dsu-7', | |
| 'dsu-8', 'dsu-9', 'dsu-10', 'dsu-11', 'dsu-12', 'dsu-13', 'dsu-14', | |
| 'dsu-15', 'dsu-16', 'dsu-17', 'dsu-18', 'dsu-19', 'dsu-20', 'dsu-21', | |
| 'dsu-22', 'dsu-23', 'dsu-24', 'dsu-25', 'dsu-26', 'dsu-27', 'dsu-28', 'dsu-29', 'dsu-30', 'DTS'] | |
| customer_segments = ['priority_plus', 'gold', 'diamond', 'mass', 'rise', 'bbd', 'private_banking'] | |
| nationalities = ['uae_national', 'arab', 'asian', 'western', 'others'] | |
| emirates = ['abu_dhabi', 'dubai', 'sharjah', 'northern_emirates'] | |
| salary_brackets = ['NA'] | |
| age_brackets = ['NA'] | |
| #salary_brackets = ['less_than_5k', '5k_to_10k', '10k_to_20k', '20k_to_40k', '40k_to_80k', 'more_than_80k', 'NA'] | |
| #age_brackets = ['Age_less_than_21', 'age_btwn_21_to_30', 'age_btwn_31_to_40', 'age_btwn_41_to_50', 'age_btwn_51_to_60', 'age_greater_than_60', 'NA'] | |
| combinations = list(itertools.product(sub_product, channel, sub_channel, customer_segments, nationalities, emirates, salary_brackets, age_brackets)) | |
| # Replace these with your PostgreSQL database connection details | |
| db_params = { | |
| 'host': '172.24.92.84', | |
| 'database': 'acedevdb', | |
| 'user': 'acedevadmin', | |
| 'password': 'dbAcedev2023', | |
| 'port': '5432' | |
| } | |
| # Define the table name | |
| table_name = 'bu_asset_sales_pf' | |
| # Create a connection to the PostgreSQL database | |
| connection = psycopg2.connect(**db_params) | |
| # Create a cursor object to interact with the database | |
| cursor = connection.cursor() | |
| # Create the table if it doesn't exist | |
| create_table_query = f''' | |
| CREATE TABLE IF NOT EXISTS {table_name} ( | |
| id SERIAL PRIMARY KEY, | |
| sub_product VARCHAR(255), | |
| channel VARCHAR(255), | |
| sub_channel VARCHAR(255), | |
| customer_segment VARCHAR(255), | |
| nationality VARCHAR(255), | |
| emirate VARCHAR(255), | |
| salary_bracket VARCHAR(255), | |
| age_bracket VARCHAR(255), | |
| mtd INTEGER[], | |
| month_end INTEGER[], | |
| ytd INTEGER[], | |
| year_end INTEGER[], | |
| mtd_target INTEGER[], | |
| ytd_target INTEGER[], | |
| month_end_target INTEGER[], | |
| year_end_target INTEGER[], | |
| ingestion_timestamp TIMESTAMP | |
| ); | |
| ''' | |
| cursor.execute(create_table_query) | |
| print("table created") | |
| # counter = 0 | |
| # Insert records into the table | |
| insert_query = f''' | |
| INSERT INTO {table_name} (sub_product, channel, sub_channel, customer_segment, nationality, emirate, salary_bracket, age_bracket, | |
| mtd, month_end, ytd, year_end, mtd_target, ytd_target, month_end_target, year_end_target, ingestion_timestamp) | |
| VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s); | |
| ''' | |
| # Generate JSON value for mtd column | |
| def generate_json(len, n): | |
| return json.dumps({f'm{i}': random.randint(1, n) for i in range(1, len)}) | |
| def generate_array(len,n): | |
| array = [] | |
| for i in range(0,len): | |
| array.append(random.randint(1, n)) | |
| return array | |
| def generate_random_int(x,y): | |
| return random.randint(x, y) | |
| def generate_gst_timestamp(): | |
| # Get the current UTC time | |
| utc_now = datetime.datetime.utcnow() | |
| # Define the UTC time zone | |
| utc_timezone = pytz.timezone('UTC') | |
| utc_now = utc_timezone.localize(utc_now) | |
| # Define the Gulf Standard Time (GST) time zone | |
| gst_timezone = pytz.timezone('Asia/Dubai') | |
| # Convert the UTC time to GST | |
| gst_time = utc_now.astimezone(gst_timezone) | |
| # Format the timestamp as a string | |
| return gst_time.strftime('%Y-%m-%d %H:%M:%S') | |
| chunk_size = 300 | |
| combination_length = len(combinations) | |
| iterations = combination_length / chunk_size | |
| counter = 0 | |
| print("combination length "+str(combination_length)) | |
| print("Expected iteration "+str(iterations)) | |
| for i in range(0, len(combinations), chunk_size): | |
| sys.stdout = open('/home/ace-capp-apim-vm-dev-user/ma_mockdata/asset_sales/pf_output_log.txt', 'a') | |
| chunk = combinations[i:i + chunk_size] | |
| counter += 1 | |
| records_to_insert = [] | |
| for record in chunk: | |
| new_record = record + ( | |
| generate_array(24, 50), | |
| generate_array(24, 80), | |
| generate_array(24, 150), | |
| generate_array(24, 200), | |
| generate_array(24, 100), | |
| generate_array(24, 250), | |
| generate_array(24, 10), | |
| generate_array(24, 10), | |
| generate_gst_timestamp() | |
| ) | |
| records_to_insert.append(new_record) | |
| #print("length of record_to_insert"+str(len(records_to_insert))) | |
| start_time = time.time() | |
| cursor.executemany(insert_query, records_to_insert) | |
| end_time = time.time() | |
| elapsed_time = end_time - start_time | |
| print("chunk inserted : "+str(counter)+" start time: "+str(start_time)+" Time taken: "+str(elapsed_time)) | |
| print("Iteration remaining "+str(iterations-counter)) | |
| connection.commit() | |
| sys.stdout.close() | |
| connection.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment