Created
June 6, 2022 08:17
-
-
Save dubeyji10/5297a404aad7319bb87f176c89c72b32 to your computer and use it in GitHub Desktop.
shardded jsons of 100 records each - clients table
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print('iterate with index - steps of 100') | |
import mysql.connector | |
from mysql.connector import Error | |
import logging | |
import re | |
import time | |
from datetime import datetime , timedelta | |
import json | |
now = datetime.now() | |
fileName = now.strftime('%Y_%m_%d_%H_%M_%S') | |
logging.basicConfig(filename="logs\\shardingJSONS_2_"+fileName+"_LOGS.log", level=logging.INFO, datefmt='%Y-%m-%d %H:%M:%S') | |
startTime = now | |
print('script started at : {}'.format(startTime.strftime('%Y-%m-%d %H:%M:%S'))) | |
logging.info('script started at : {}'.format(startTime.strftime('%Y-%m-%d %H:%M:%S'))) | |
print('creating shards of huge amount of data\n') | |
sizeOfShard = 100 | |
index = 0 | |
counter = 0 | |
#lastIndex = 479,300 | |
lastIndex = 80000 # test with clients table ~ around 80k records | |
alpha = re.compile(r"[a-z|A-Z]|[!@#$%^&*/]") | |
clients = 'select * from clients' | |
leads = 'select * from leads' | |
conversation = 'select * from conversation' | |
invoiceInfo = 'select * from invoice_info' | |
invoiceItems = 'select * from invoice_items' | |
# | |
# if query gives null response add condition | |
# | |
# try an outrage query | |
# build condtion logic for that response | |
# | |
for i in range(index , lastIndex ,sizeOfShard): | |
query = '' | |
print(' ------------- {} -------------- '.format(counter)) | |
logging.info(' ------------- {} -------------- '.format(counter)) | |
# print('{} -> running for limit {},{}'.format(counter,i,sizeOfShard)) | |
print('{} -> limit {} OFFSET {}'.format(counter,sizeOfShard , i)) | |
offSet = ' LIMIT {} OFFSET {} '.format(sizeOfShard,i) | |
with mysql.connector.connect() as connectPointer: | |
with connectPointer.cursor() as aCursor: | |
query = clients+offSet | |
print('running - ',query) | |
print('1. fetching clients') | |
logging.info('\t1. fetching clients : {}'.format(query)) | |
aCursor.execute(query) | |
result = aCursor.fetchall() | |
if not result: | |
print('--> no clients left in table <--') | |
logging.info('__________ no clients created left to write __________') | |
else: | |
print('writing clients to json') | |
clientsFile = 'clients\\clients_index_{}.json'.format(i) | |
# ----------------------- fields to json items --------------------------- | |
client_collection_list = [] | |
for rows in result: | |
client_collection = {} | |
# convert it to string otherwise it raises error | |
local_id = rows[0] | |
cp1 = rows[1] | |
cp2 = rows[2] | |
name = rows[3] # company name - Name - since not null guaranteed entry | |
company_grade = rows[4] | |
phoneNo = str(rows[5]) # error in db remove if not a valid number | |
mn_1 = rows[6] | |
mn_2 = rows[7] | |
email = rows[8] | |
designation = rows[9] | |
address = str(rows[10]) # remove special chars from it - results in unvalidated json | |
website = rows[11] | |
user_id = rows[12] | |
added_on = str(rows[13]) | |
last_conv = rows[14] | |
followup_on = str(rows[15]) | |
last_mail_no = rows[16] | |
last_mail_on =str(rows[17]) | |
last_sms_no = rows[18] | |
last_sms_on = str(rows[19]) | |
country = rows[21] | |
if not bool(re.search(alpha,phoneNo)): | |
client_collection['phone_no'] = phoneNo | |
# print("{ 'phone_no' : ",phoneNo,"}") | |
else: | |
client_collection['phone_no'] = None | |
# print("{ 'phone_no' : ","'null'","}") | |
client_collection['Name'] = name | |
client_collection['company_name'] = name | |
client_collection['user_id'] = user_id | |
client_collection['local_id'] = local_id | |
client_collection['added_on'] = added_on.replace(' ','T') + '+05:30' | |
if len(followup_on)<5: | |
client_collection['followup_on'] = None | |
else: | |
client_collection['followup_on'] = followup_on.replace(' ','T') + '+05:30' | |
if len(last_mail_on)<5: | |
client_collection['last_mail_on'] = None | |
else: | |
client_collection['last_mail_on'] = last_mail_on.replace(' ','T') + '+05:30' | |
if len(last_sms_on)<5: | |
client_collection['last_sms_on'] = None | |
else: | |
client_collection['last_sms_on'] = last_sms_on.replace(' ' ,'T') + '+05:30' | |
client_collection['company_grade'] = company_grade | |
client_collection['website'] = website | |
client_collection['last_conversation'] = last_conv | |
client_collection['country'] = country | |
client_collection['address'] = re.sub('[^A-Za-z0-9]' , ' ',address) | |
client_collection['mobile_no1'] = mn_1 | |
client_collection['mobile_no2'] = mn_2 | |
client_collection['contact_person1'] = cp1 | |
client_collection['contact_person2'] = cp2 | |
del name | |
del user_id | |
del local_id | |
del added_on | |
del company_grade | |
del phoneNo | |
del mn_1 | |
del mn_2 | |
del address | |
del website | |
del followup_on | |
del last_mail_no | |
del last_mail_on | |
del last_sms_on | |
del last_sms_no | |
del cp1 | |
del cp2 | |
# print(json.dumps(client_collection)) | |
client_collection_list.append(client_collection) | |
# payloadJsons\\clients_1_temp.json - read this payload to push | |
clientsFile = 'clients/clients_index_{}.json'.format(i) | |
with open(clientsFile,'w',encoding='utf-8') as clientJson: | |
clientJson.write(json.dumps( | |
{"data":client_collection_list},indent=4,default=str,sort_keys=True | |
)) | |
print(f' . .. check {clientsFile}') | |
logging.info(f'\t\t . .. check {clientsFile}') | |
time.sleep(2) | |
counter+=1 | |
now = datetime.now() | |
endTime = now | |
print('script ended at : {}'.format(endTime.strftime('%Y-%m-%d %H:%M:%S'))) | |
logging.info('script ended at : {}'.format(endTime.strftime('%Y-%m-%d %H:%M:%S'))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment